diff --git a/.gitattributes b/.gitattributes index 6e6f5012e409a3b7a38e8c21bb68ab25d74f799e..e9077267405898600770c000fb156cbd0d10c12b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -297,3 +297,99 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text 619m2b72b7/evaluation/generation/examples.619m2b72b7_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text 619m2b72b7/evaluation/generation/examples.619m2b72b7_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text 619m2b71b5/evaluation/generation/examples.619m2b71b5_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl filter=lfs diff=lfs merge=lfs -text +146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_1.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.jsonl filter=lfs diff=lfs merge=lfs -text +421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_4.jsonl filter=lfs diff=lfs merge=lfs -text diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..31a78fc54e4ba9cfe1272f1348d5b0c1af141c22 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27db7a26987fd79c14f9112deca512c0532e9b3d2346e642b62fed184a527e8f +size 27478295 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..088fb033f6aa68010d7f38b11df00724ec49bed8 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cda7269f256d1b46037a9eee4065a94b9188937ea8bdbb91788c66eb616bf29 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..29da14563781838fabc065a57d6f8c77b12282dd --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f357c049a721e739455ab0f1b9427d7beb5e74d56e626a8ff3ee27a3c8fa4ec7 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..efb396eb2245431ea608f804168d916342773013 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df0b314e872d8764fd84d304789985ab33332291b6fbc1f6fdcbd59a191f859d +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53294aae6eaa5803cd8bbd08d138bdcfb0c45c3b --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ea4721cb9be127d5678f55ccf10a6b3249819755e11ffd0efdf612b8585673 +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6861c2df691c502ce0e54dfeddfd3fe3de43e0c --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b028d07aa758516825d4406e3635c9f7e9fa1ea98e831ed1bc3538fc686f0e +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..75b63b9213f2beb66ae31ca8d0cb8546f1a63d5b --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5df472ec4d2557b719af8df05734629ee49ce9efce7bf9b02303c9a1b2cfb40d +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eccbba039a105064d2481fcd0b416398e64f59ef --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffc40aff848938f6f4e525f95c06c946e4b4b279614b385db1b7c45dfcf36af +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cda955b4c352345adf50574c3b055a6864955c6 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9adf4852bb03d30d55c2aa2f5f3bf16b587e870a72a662e78b0a987556c7a882 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcc209a646d3f1ace5a3eb0ef3bff1bfb3ce1a39 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22657dc7471da76df7dc953ba62ecad75dda125c6b6a349bf68552e5eff99fb +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f26d192139a15a8063013674c634d750b514074b --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1ee502adcaf704ecc9b6de3df7b30ca3f2e5c78151015b60a8a93acc22ad0d +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28a0603a33bb5e83cb334e162922154d1ac78512 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ea63603e615e8f3e04767bd016f337d500567a83a7e66d1c462f4d33795e0e +size 27478231 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecbf1cf1711d6fd501d91ca2dc7a5a08cc3274a8 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36439cc0aeec0fc88d6e9dc98a21cb7b212b5429942acf6bd96fb7672e265ac +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e41f6f3fb819e3724744c69419891b7c742a437 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075f9e025b58df3acbe237cc63f10475a53ebe5a1ca7b784ee487788bdddb885 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a944affd150875e3a211210080c1769fb91735ed --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1998f53ac117d1affa82228b8c47cfc2b5f904d66dd2f8044049734cb79678e +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c632c980389b8feb018464e997c223617d49e0e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c49bc8061bc8374a0efd58863de7e69a38270ef7bb1861fb91cd800053326c +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bbe30ed2b9ad5e00144b5552b5625be21810c2a --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68419897504d2ea6c0155baa47bf01ab0b29fb082f7e25d1a4adf4dab6bc951e +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fda0a97ec6e1a1252b15d4dc0fc3ad723b51b99 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44469407a92e0c669944aa0842609a6e493ac9f0f275bb9ee57168893cdb9769 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7755584b70a851e9e6acf57a43f081ac5215b562 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c71ff77233e83a8a426313c75abac51625b2b303fbb7bff324a6b1e1e1dc47e +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3141189da59e8384bdfb705e735f8459f5ed1bd0 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669a0960221fc39a00ce6f1874b4e1d0babc59d6a12bd865d39a13aa538b74c9 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1f0d0aea8e001ba8af2ab7593662605f7c15776 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adee1bbe58afc1f240f72d14c2e7e29f1f5ecdb1222f9e9d9a02404c77cac8c +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1cd6e1b731efc5b51f5a1841d574ab22f36a25c --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc07ff57b8be3b6988ee2cc79420f09b449d13ae9928d226f8d6df01e813c830 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..381681be445957af89cc532ba0307f331b87058d --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369e9589c953ff63e42dbc4632fb4692f3a12f1ad20c9ce0624420f986f0319d +size 27478231 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f072f782835b6228cac2c42f711de1a82f516757 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f3a4f39b3d4a72e3ef4c45b497ae3b2243e47ea80037f0c991709d4a8b3812 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fba3c2164a9801bab7452d32066a298bdc77486 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26384b2ef6a01ac0a685940f0b7f9cd71fcbf55d0e36165a2a4a0b4283b18814 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e82fe0eebc26fe7aac3a5e1ddfc0416327876315 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c60b7624200f22a289a72cc83934cf4489b072f4250ff0bd0ee2ee51f6bf8a +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47c84c9acd458e4922f6e0c03007fea7432e222e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77c688543779a3438fd139b49a20fea6950525cbffbacc5f945703595608f8d +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f5e65b97d996e6aaa2d6de47673aac8ae72af8c --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701330e340ded122ea6e31f67dee19c1f35b27b1c43d1b6e6f8888f621b60a6e +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f01fdf99f3fecaf54734d8a54ac5882446b63bf1 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8708c2c9209f9372b2aabb8835d5fdba38c47b632afda62fb91434d82c12a8b +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9083f814fe3a044fa9f7b42b952377601bfae287 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0be3b2dc55c255223ab545c0c5e57e1f1062457280571cf9b260067a101dad61 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e8200795fa2cd432bd0c85ec970eed3d4cbfef1 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25dd049337b3f6175944cb9e69e60110fe311ea38f52d363f1637b247a74440e +size 27478114 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..621c22e6af48fa4eaafca025ed319ecbfbde033f --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a3b5bbd6005cf8a454406c74e465da2f9376b61ea1675752edc1c90cb1d6197 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..083a4850017b724095cf73d24e8a205fc9ad17af --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d698e9b5bce693d879b13bb52b882d9fb04b903c38d9ff3761ef910c49b31c2 +size 27478434 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..58358e586fff6a9e9feb56eb696d6966c2636a41 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7dac8454ed8f1aa2a74d080dfc4601641548f2455521276881e7dc06a5b4001 +size 27478167 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4d1e9a759af56da4be90f54d6396ae443da8245 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:408eeec1835bc99a3f66ac1986c9cf40cc4ca7ad6dffbd815dcde8c43a766c2e +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce2e6ec6fe04da7bcbd0dcc7dd5a440d0163884e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aae1c5356bba3a1c132a99b7645373d1f5fc0dc3380ed82084bce5852ad298d +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e99365d92eccf52c83d9c3f866b232d0b7628d4 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba31aa5a94d50002799ea09fc93221182c3974a420a3e15560359d034c550884 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08fa9f3ef5832304f8cf08fcc7c1097e304c3bc2 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:423a1fafb6691ed4c039784ed3415732fd211f9bb944241c6b896c84a5f1106b +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d08f715db769ae70b748ce28354bacdca636147b --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dddf3e1db4e910fb1de9743542d1434ba136b04c0e0d512883dbd55269653dd +size 27478434 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd1460832d27207abe3ea4e896740aa71db5ddbc --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de2a02d060aaa93d94130d97dd77ca65d741146b45f936019f89006c3c6d337b +size 27478050 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9f2b136a4e7086815f7d11659b647f81ed063d6 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b76957fcdedebbc46adcecb2a208716f545cfbdf9f3c5d8c89b0de71e8d52e3 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d620c9c4f876c65bd9a6f7001aedcbb4970a442 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b9714a04cbabbc0e282831fba47fa4a93519823f991c84973e48a474085b8f +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9efa5666de48b2bdb25f79919badb90b9af5c31b --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea3dcd61822c5fc56771b240ecdaa28108c62069b9a810a15d0797a162c2741a +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aeafe5f52c1518d3cdaf18f101fc8033ec0ebef3 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f87d36ecb94eabe816ec587a3a8bfd62d60736f401f72fbd731474d8a4b663a +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a38d361d32e183759bb591f77ccee0a1de4ca255 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eba94dc39f86dbf2b6b9f99ef24f7ad8a248fd364c4f803f7fdb4fbbc23ae664 +size 27478231 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da384390b6deef6aef7dcebbfa3a3908aec6124e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02dcd9627823ede520fa7af7b7335b6b81bf8460ff6b58b5b529ca9b42c52b3c +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c57c50af1346dbaceeb3feee6a5bfda7f53c8ad3 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0566ac23cca25e0fdf2c865878ab350d8156f095f533235737a46778b4baa1a2 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b241ee7b83edfceedc5c3910970dd5eed65121bd --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabfd53ab4b1cf203341ee204ef80294d9e91a304209d3953d686a861fb6d0d3 +size 27478434 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4301a92868cb6dc32a6bf06c87124856e3389fd1 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4866852f63162b48af5e1d85b7beb8ac8181def6c089cf39d6da8851c8bf986 +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..239973773c2454e9e3922a6a815e42f1db32f03e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a774ba769f521d6f096545e57c606d4d795021064b95f6e5828f9ecc1cbffe5 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d3c0b900f7c7e3ef9be6d09d861fb5d3995cf1c --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f92cadb2f74805c2b5f30a74a5da5f21f644e20e4bdc98811a2b22a36d7827 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4a71368570e5b886fc1f9a90f2aee6e24dedc56 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41a225633d6285febe469b6fd8aa5f2999fb0ef71c9abb932372edf589cbac3 +size 27478306 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ec1877ca1a6d60c160760e3ec2bc5d600a995b3 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf5d4a8f6d4d919befe6a35c6ed9336aa06da99c73e6289f360f64578866dbb +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22134b03deae8c50dd7c2989e73fbb07628ddc0a --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32a6aeb3375df24a5afc50a428690fdf7895a9b5845f38dfeaa994f7f3eefe21 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8345ec57ff3366aa820dfe5795dd39b65de12e0e --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78a7b7bbbc670c8e2014824b6b843097138962e0ab383726f65067183214cc1 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c6aa74636c80079da1e60fbd844991598be2b24 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc954811c2587d64cfa343b2f73daeecfa5afdc3ca909964f26dc9da382cc82 +size 27478167 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97f737eec9390e07e2f892d9867bdf6eb1243080 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef27aed481d046040a27aba8c7775acc5c0d5f02f222db3dd04f52cb07648e74 +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13f3c1ced155697217e0f2aa8587f2af2cb5fd8a --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b54c4060b1f82940374f33944896938b9e12038b9bca0d51294d6b7ecbbae00 +size 27478370 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..896ac886170d4e4aa36ddd0c43114df2c42803e1 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7991f6ab02b7401cae95818158c13f51b9722270369388faa149e315b81eeb +size 27478178 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5336a5f20439dfd6590446cf18fda1272686a37 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd87b1688fdcbb9e9c5ba56401d39b7c74ff756d2b689ab766fbce5d3ffd713 +size 27478242 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c91242a5ca076c087e21b2d825d5dc0021f0ff2a --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ddb6ff8fb23d4dd9c2522b8ea03c7bf41eac641dd119db8c44da7895d959615 +size 27478359 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f18b6c92f6eb242543e411ff62b491e938ce9665 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bb2847d1f6e805650b395e331cef6a29adc6b62a2194de7b68da1613e8db49 +size 27478103 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ef33b7374d20a658238e24e310cdd4ebb3440b9 --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f5cbd4ad881068f814ecae65e28baa56e83afc51f21407f587e488c0cdda9d +size 27478359 diff --git a/146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e7e1eafef7d7cdf8ec06b0a41b148f003849fbf --- /dev/null +++ b/146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3d3d5574f07b27278eb548a64d911b5bd6df3b37283a2fdeb865dfe4e82a5b +size 27478167 diff --git a/146m14b100m/global_step21553/layer_01-model_00-model_states.pt b/146m14b100m/global_step21553/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa308f020bd41e8d25fb8577c7069a4b82d91331 --- /dev/null +++ b/146m14b100m/global_step21553/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043a4ad50dec98cb4475a9b8ceef40ba8d06075cf0fbfb31fd40a8419764a4f5 +size 80413955 diff --git a/146m14b100m/global_step21553/layer_03-model_00-model_states.pt b/146m14b100m/global_step21553/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d25dc14dbb3755fe9cc92f5c9fbfd3ddc2514697 --- /dev/null +++ b/146m14b100m/global_step21553/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f44325d25e1fb2776dde54b0de18eee714633f445a089b4efe663a282ec946 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_04-model_00-model_states.pt b/146m14b100m/global_step21553/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..368a33d6db7cbf09d96e98922f9c0a85da21dfd3 --- /dev/null +++ b/146m14b100m/global_step21553/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7826e62390102df675b8fc8a8f51e641235d95f07f286ef6d318d07f0ebd5d +size 14180099 diff --git a/146m14b100m/global_step21553/layer_05-model_00-model_states.pt b/146m14b100m/global_step21553/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..15c2d54ee854146457b99a74a645f9d96c18ae78 --- /dev/null +++ b/146m14b100m/global_step21553/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892f9e2480e1d62f3d090b6a6cb7ec123e4ac4579eadffee6529e927f6583cef +size 14180099 diff --git a/146m14b100m/global_step21553/layer_06-model_00-model_states.pt b/146m14b100m/global_step21553/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e470a37d78e9580eb8d70ecf1c4fb587e471f1ca --- /dev/null +++ b/146m14b100m/global_step21553/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3669cd4fb2ae7cd5620f5c752d32d9d1b1e292134ff10b5ae31b902f00a147 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_07-model_00-model_states.pt b/146m14b100m/global_step21553/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..594e1b9c49e77e341071cd8788dd08b7e32ea1e4 --- /dev/null +++ b/146m14b100m/global_step21553/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532cf6ba150515814e956ec42cdb9156f908e87794291419a26d6d0f63adf908 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_08-model_00-model_states.pt b/146m14b100m/global_step21553/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ee7d4913f173f9a96bf9df9ac51cb88605dae87 --- /dev/null +++ b/146m14b100m/global_step21553/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25b5b8ae30926fa14f51b6fe74ca9e0458a10ae1b7628b1ad06b029dc8b41227 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_09-model_00-model_states.pt b/146m14b100m/global_step21553/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c59ae680ddb6d3557029778916a3fd5249cea117 --- /dev/null +++ b/146m14b100m/global_step21553/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0926948df1e13d7bc5aebbdb3ad4b1a1fc09c7899398d9de6b47fccc02afd156 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_10-model_00-model_states.pt b/146m14b100m/global_step21553/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..418bb84320871867abf88caaac66dae8135ea6b2 --- /dev/null +++ b/146m14b100m/global_step21553/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f54729fd5f09281fe0aefdbcf4245e6bb2e9dcf9579b8c8d3650344dbe010c6 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_11-model_00-model_states.pt b/146m14b100m/global_step21553/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a10ca7c869f34354c4fa7951b2871600263e9cf --- /dev/null +++ b/146m14b100m/global_step21553/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1406b1292b5e3973d762267112afb52acd078d32ac88f5b06f65ccf797ec966b +size 14180099 diff --git a/146m14b100m/global_step21553/layer_12-model_00-model_states.pt b/146m14b100m/global_step21553/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d18cd1a72867fa396cee95084c682ff5a7cf22a0 --- /dev/null +++ b/146m14b100m/global_step21553/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4e51edb1c3872ec330771e090a7edf69a82fe10fabefe74b5803125cad2b3f +size 14180099 diff --git a/146m14b100m/global_step21553/layer_13-model_00-model_states.pt b/146m14b100m/global_step21553/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eec105e64f851c99b81121d78484135814cab10a --- /dev/null +++ b/146m14b100m/global_step21553/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4457708ea1131f7756a6d9f2b6ffb6432da84b943457dfdc6b17c039b4471a9 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_14-model_00-model_states.pt b/146m14b100m/global_step21553/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..41d48bdb3d980da0e23d8c0542b1dc45686e3213 --- /dev/null +++ b/146m14b100m/global_step21553/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d695ffa89a4d7b98358d00f521cc0a070999172f427ae9559d3cd4a48221022 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_15-model_00-model_states.pt b/146m14b100m/global_step21553/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbc7f573ae05e7c8400ba53245123fcc47e7f2d9 --- /dev/null +++ b/146m14b100m/global_step21553/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d072e848cfd44f91b3ed5dcf8553ccd75e82f5a6374fdfa7e65f07b0cf4da5d2 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_16-model_00-model_states.pt b/146m14b100m/global_step21553/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee7057d9090c2c5b1f71c33825d3eee4a24adba7 --- /dev/null +++ b/146m14b100m/global_step21553/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a09eb102a0a905d825704fd42632566cbf8078d4353571b5299ef921ffedd7 +size 14180099 diff --git a/146m14b100m/global_step21553/layer_17-model_00-model_states.pt b/146m14b100m/global_step21553/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22747091a999a45cb2a1839aeb28df63cbe8d8b4 --- /dev/null +++ b/146m14b100m/global_step21553/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8da265be7a5aa82e5b2589f5a09d0cc4ad9d3e82c6ae5b33a3cea62e028dfda +size 14180099 diff --git a/146m14b100m/global_step21553/layer_19-model_00-model_states.pt b/146m14b100m/global_step21553/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..26c90e5ced48a98ef2631dabe8e9236a928a47aa --- /dev/null +++ b/146m14b100m/global_step21553/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e00439d84a1f9b6eaeb119a0ba182f204630babec8c03d6712c8a454fc55ec +size 4291 diff --git a/146m14b100m/global_step21553/mp_rank_00_model_states.pt b/146m14b100m/global_step21553/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eec52744c5404fc1e34782a6dfb645b4a3130ffc --- /dev/null +++ b/146m14b100m/global_step21553/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362be02f15cb9a7fba71e66eceb8ec0e88036575a3b50f4e13df51f8925a39d2 +size 35443 diff --git a/146m14b100m/logs/3301357.err b/146m14b100m/logs/3301357.err new file mode 100644 index 0000000000000000000000000000000000000000..cd73a79ac79181f2ff165ff53b9245e05be09e2f --- /dev/null +++ b/146m14b100m/logs/3301357.err @@ -0,0 +1,1124 @@ +3: 2023-03-13 23:16:58.218077: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218085: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218084: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218086: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218094: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218093: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-13 23:16:58.218092: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231937: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231937: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231945: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231947: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231933: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231944: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231950: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:58.231944: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232464: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232474: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232464: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232478: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232481: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232479: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232464: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-13 23:16:58.232473: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284740: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284752: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284750: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284757: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284755: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284757: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-13 23:16:58.284758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.284999: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285012: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.284996: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285006: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285014: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-13 23:16:58.285014: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353060: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353058: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353056: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353061: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353058: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353062: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353061: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-13 23:16:58.353056: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361140: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361145: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361151: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361165: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361163: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361175: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361169: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-13 23:16:58.361171: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375619: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375610: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375620: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375614: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375616: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375627: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375628: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-13 23:16:58.375634: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-13 23:16:59.886726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.886737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:16:59.887086: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887090: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887092: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887096: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887098: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887098: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887096: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-13 23:16:59.887102: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922585: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922577: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922785: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922791: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922591: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:16:59.922796: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922797: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922799: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922801: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922804: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-13 23:16:59.922809: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.924877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924888: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924888: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.924890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:16:59.925077: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925078: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925083: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925083: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925085: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925085: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925089: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-13 23:16:59.925095: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926251: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926258: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926253: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926266: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926265: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926275: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:16:59.926436: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926438: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926443: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926443: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926444: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926448: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926451: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-13 23:16:59.926450: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950449: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950455: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:16:59.950635: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950637: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950637: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950641: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950643: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950642: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950647: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:16:59.950648: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033141: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033142: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033149: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033150: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033142: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:00.033334: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033336: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033338: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033340: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033342: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033343: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033344: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-13 23:17:00.033344: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.035897: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035904: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035905: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035910: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035909: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.035903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:00.036240: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036239: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036244: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036245: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036247: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036249: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036249: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-13 23:17:00.036251: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.037830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037835: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037833: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037844: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.037840: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:00.038181: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038183: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038185: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038187: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038189: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038191: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038192: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-13 23:17:00.038196: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-13 23:17:05.820395: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820400: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820407: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820408: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820415: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.820418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.820727: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820769: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.820734: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.820736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820772: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.820732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820778: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820838: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-03-13 23:17:05.820740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820781: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.820740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820780: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820843: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-03-13 23:17:05.820746: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820782: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-03-13 23:17:05.820749: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820779: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820848: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-13 23:17:05.820785: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820860: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.820865: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821245: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821256: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821256: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821251: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821265: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.821260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822339: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822347: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822339: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-13 23:17:05.822347: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822352: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822355: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822360: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822359: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822361: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-13 23:17:05.822362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822732: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822739: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822751: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822753: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822753: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822755: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822756: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822755: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822758: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-13 23:17:05.822761: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-13 23:17:05.822775: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823059: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823061: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823063: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823063: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823069: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823079: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823080: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-03-13 23:17:05.823230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823087: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823088: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823087: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823088: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-13 23:17:05.823091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-13 23:17:05.823091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-13 23:17:05.823101: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823234: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823246: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823247: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823247: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823252: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823252: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823268: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823271: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-13 23:17:05.823283: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-13 23:17:05.823284: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.832490: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832502: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832497: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.832501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834335: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834338: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834340: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834341: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834350: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834351: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834350: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834350: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834354: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834356: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834361: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834362: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-13 23:17:05.834375: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-13 23:17:05.834375: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.836726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836742: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.836738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838972: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838975: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838978: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838985: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838989: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838989: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-13 23:17:05.838993: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838992: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838993: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.838996: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-13 23:17:05.839002: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.844680: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844695: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844699: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.844715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846431: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846427: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846430: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846431: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846439: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846433: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-03-13 23:17:05.822955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822956: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822958: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822961: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822963: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822970: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822971: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822971: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822974: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822977: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822975: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.822995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.822997: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-13 23:17:05.823009: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-13 23:17:05.823010: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846436: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-13 23:17:05.846449: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846450: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846448: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846451: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846453: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-13 23:17:05.846456: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +2: Successfully preprocessed all matching files. +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +2: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +2: Building extension module utils... +2: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +2: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: +7: Loading extension module utils...Loading extension module utils...Loading extension module utils... +7: +7: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +4: No modifications detected for re-loaded extension module utils, skipping build step... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +1: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +1: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils...Loading extension module utils... +4: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/146m14b100m/logs/3301357.out b/146m14b100m/logs/3301357.out new file mode 100644 index 0000000000000000000000000000000000000000..af29a63151222cf36593f382e237490a92029e92 --- /dev/null +++ b/146m14b100m/logs/3301357.out @@ -0,0 +1,9645 @@ +Model parameters: d_model 768 ffw_size 3072 kv_size 64 n_heads 12 n_layers 15 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 15 --hidden-size 768 --num-attention-heads 12 --kv-channels 64 --ffn-hidden-size 3072 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 5_517_578 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-146m14b100m --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 5_517_578 --lr-warmup-samples 55_176 --clip-grad 1.0 --weight-decay 1e-1 --log-interval 10 --save-interval 1000 --eval-interval 1000 --eval-iters 1 --tensorboard-dir tensorboard_146m14b100m --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_146m14b100m --load checkpoints_146m14b100m --train-weighted-split-paths-path train100m.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/3301357.json --zero-stage 0 +START 3301357: Mon 13 Mar 2023 11:16:38 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 45.0c 98.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 34.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 42.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 42.0c 79.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 42.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 41.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 41.0c 168.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 39.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 39.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 35.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 41.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 44.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 38.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 38.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 36.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 52.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 39.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 49.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 36.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 38.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 49.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 41.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 39.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 39.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +7: Launching on nid005907 (7/8), master nid005900 port 9999, GPUs 8, CUDA: True +6: Launching on nid005906 (6/8), master nid005900 port 9999, GPUs 8, CUDA: True +1: Launching on nid005901 (1/8), master nid005900 port 9999, GPUs 8, CUDA: True +5: Launching on nid005905 (5/8), master nid005900 port 9999, GPUs 8, CUDA: True +4: Launching on nid005904 (4/8), master nid005900 port 9999, GPUs 8, CUDA: True +2: Launching on nid005902 (2/8), master nid005900 port 9999, GPUs 8, CUDA: True +3: Launching on nid005903 (3/8), master nid005900 port 9999, GPUs 8, CUDA: True +0: Launching on nid005900 (0/8), master nid005900 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/3301357.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1000 +0: eval_iters ...................................... 1 +0: eval_only ....................................... None +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 3072 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 768 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-146m14b100m +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_146m14b100m +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 5517578 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 55176 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... None +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 12 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 15 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... False +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. None +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_146m14b100m +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_146m14b100m +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 5517578 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +7: > setting tensorboard ... +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-03-13 23:17:32,336] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.114 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.cuda.o scaled_upper_triang_masked_softmax_hip.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: [1/1] c++ scaled_masked_softmax_hip.o scaled_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 20.821 seconds +0: time to initialize megatron (seconds): 3.971 +0: [after megatron is initialized] datetime: 2023-03-13 23:17:55 +0: building GPT model ... +0: [2023-03-13 23:17:56,105] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-03-13 23:17:56,105] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-03-13 23:17:56,106] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.17 GB, percent = 6.8% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-03-13 23:17:58,111] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=22 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: undo +0: 19: MixedFusedLayerNorm +0: 20: EmbeddingPipe +0: 21: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-03-13 23:17:58,312] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-03-13 23:17:58,313] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.29 GB Max_CA 0 GB +0: [2023-03-13 23:17:58,313] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.19 GB, percent = 6.8% +0: setting training iterations to 21553 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-03-13 23:17:58,315] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-03-13 23:18:11,057] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-03-13 23:18:11,057] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-03-13 23:18:11,058] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-03-13 23:18:11,063] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-03-13 23:18:11,063] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-03-13 23:18:11,183] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-03-13 23:18:11,184] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.29 GB CA 0.31 GB Max_CA 0 GB +0: [2023-03-13 23:18:11,184] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.87 GB, percent = 6.9% +0: ninja: no work to do. +0: Time to load utils op: 0.2777984142303467 seconds +2: ninja: no work to do. +0: Time to load utils op: 0.0006694793701171875 seconds +2: Time to load utils op: 0.1482686996459961 seconds +0: Time to load utils op: 0.3033740520477295 seconds +0: Time to load utils op: 0.20250511169433594 seconds +0: Time to load utils op: 0.2026660442352295 seconds +0: Time to load utils op: 0.20228815078735352 seconds +0: Time to load utils op: 0.20250320434570312 seconds +0: Time to load utils op: 0.202439546585083 seconds +0: Time to load utils op: 0.2024822235107422 seconds +2: Time to load utils op: 0.20400571823120117 seconds +2: Time to load utils op: 0.20396780967712402 seconds +2: Time to load utils op: 0.20371127128601074 seconds +2: Time to load utils op: 0.2036445140838623 secondsTime to load utils op: 0.2037055492401123 seconds +2: +2: Time to load utils op: 0.20306849479675293 seconds +2: Time to load utils op: 0.20462727546691895 seconds +1: Time to load utils op: 0.21123743057250977 seconds +1: Time to load utils op: 0.2112736701965332 seconds +1: Time to load utils op: 0.2113029956817627 seconds +1: Time to load utils op: 0.21129703521728516 seconds +1: Time to load utils op: 0.21130585670471191 secondsTime to load utils op: 0.21130681037902832 secondsTime to load utils op: 0.21130037307739258 seconds +1: +1: +1: Time to load utils op: 0.21135425567626953 seconds +3: Time to load utils op: 0.2111496925354004 seconds +3: Time to load utils op: 0.2111496925354004 seconds +3: Time to load utils op: 0.2111673355102539 secondsTime to load utils op: 0.21117424964904785 seconds +3: +3: Time to load utils op: 0.21117925643920898 secondsTime to load utils op: 0.21121454238891602 seconds +3: +3: Time to load utils op: 0.2112116813659668 seconds +3: Time to load utils op: 0.21120619773864746 seconds +0: Time to load utils op: 0.0005738735198974609 seconds +0: Time to load utils op: 0.00041866302490234375 seconds +7: Time to load utils op: 0.21175122261047363 seconds +7: Time to load utils op: 0.21205449104309082 seconds +7: Time to load utils op: 0.21157503128051758 seconds +7: Time to load utils op: 0.21082115173339844 seconds +7: Time to load utils op: 0.2112104892730713 secondsTime to load utils op: 0.211594820022583 seconds +7: Time to load utils op: 0.21143770217895508 seconds +7: +7: Time to load utils op: 0.21078157424926758 seconds +0: Time to load utils op: 0.00037026405334472656 seconds +4: Time to load utils op: 0.21120381355285645 seconds +4: Time to load utils op: 0.2112104892730713 seconds +4: Time to load utils op: 0.21122980117797852 seconds +4: Time to load utils op: 0.2112271785736084 seconds +4: Time to load utils op: 0.21123576164245605 seconds +4: Time to load utils op: 0.2112431526184082 seconds +4: Time to load utils op: 0.21123361587524414 seconds +4: Time to load utils op: 0.21114087104797363 seconds +0: Time to load utils op: 0.00038361549377441406 seconds +0: Time to load utils op: 0.00037980079650878906 seconds +0: Time to load utils op: 0.00037217140197753906 seconds +6: Time to load utils op: 0.21034502983093262 seconds +6: Time to load utils op: 0.21034812927246094 seconds +6: Time to load utils op: 0.21036148071289062 secondsTime to load utils op: 0.2103571891784668 seconds +6: +6: Time to load utils op: 0.21036839485168457 seconds +6: Time to load utils op: 0.21038389205932617 seconds +6: Time to load utils op: 0.21035385131835938 secondsTime to load utils op: 0.21036005020141602 seconds +6: +5: Time to load utils op: 0.2127225399017334 seconds +5: Time to load utils op: 0.2127375602722168 seconds +5: Time to load utils op: 0.21271848678588867 seconds +5: Time to load utils op: 0.2127528190612793 secondsTime to load utils op: 0.21275973320007324 seconds +5: Time to load utils op: 0.21272802352905273 seconds +5: Time to load utils op: 0.21273303031921387 secondsTime to load utils op: 0.21272683143615723 seconds +5: +5: +2: Time to load utils op: 0.0004801750183105469 seconds +2: Time to load utils op: 0.0004978179931640625 seconds +2: Time to load utils op: 0.0005309581756591797 seconds +2: Time to load utils op: 0.0005092620849609375 seconds +2: Time to load utils op: 0.0005130767822265625 seconds +2: Time to load utils op: 0.0005481243133544922 seconds +2: Time to load utils op: 0.0005631446838378906 seconds +2: Time to load utils op: 0.00047516822814941406 seconds +7: Time to load utils op: 0.0009262561798095703 seconds +7: Time to load utils op: 0.0011336803436279297 seconds +7: Time to load utils op: 0.0010821819305419922 secondsTime to load utils op: 0.00115966796875 seconds +7: +7: Time to load utils op: 0.001154184341430664 seconds +7: Time to load utils op: 0.0011029243469238281 secondsTime to load utils op: 0.0011756420135498047 seconds +7: +7: Time to load utils op: 0.001004934310913086 seconds +6: Time to load utils op: 0.0005805492401123047 seconds +6: Time to load utils op: 0.0007092952728271484 seconds +4: Time to load utils op: 0.0008308887481689453 seconds +1: Time to load utils op: 0.0007848739624023438 seconds +6: Time to load utils op: 0.0008220672607421875 seconds +6: Time to load utils op: 0.0007600784301757812 seconds +1: Time to load utils op: 0.0009143352508544922 seconds +6: Time to load utils op: 0.0007531642913818359 seconds +5: Time to load utils op: 0.0008485317230224609 seconds +6: Time to load utils op: 0.0009601116180419922 secondsTime to load utils op: 0.0009686946868896484 seconds +6: +4: Time to load utils op: 0.0010194778442382812 secondsTime to load utils op: 0.001049041748046875 seconds +4: +4: Time to load utils op: 0.0010709762573242188 seconds +6: Time to load utils op: 0.0010094642639160156 seconds +5: Time to load utils op: 0.0009012222290039062 seconds +1: Time to load utils op: 0.0010426044464111328 seconds +4: Time to load utils op: 0.000990152359008789 seconds +1: Time to load utils op: 0.0010349750518798828 seconds +4: Time to load utils op: 0.0009140968322753906 seconds +1: Time to load utils op: 0.0010983943939208984 seconds +1: Time to load utils op: 0.0010106563568115234 seconds +5: Time to load utils op: 0.0009765625 seconds +4: Time to load utils op: 0.0010380744934082031 secondsTime to load utils op: 0.0011036396026611328 seconds +4: +1: Time to load utils op: 0.0010590553283691406 seconds +1: Time to load utils op: 0.0011017322540283203 seconds +5: Time to load utils op: 0.001117706298828125 seconds +5: Time to load utils op: 0.0011334419250488281 seconds +5: Time to load utils op: 0.0010995864868164062 seconds +5: Time to load utils op: 0.0011200904846191406 seconds +5: Time to load utils op: 0.0011527538299560547 seconds +3: Time to load utils op: 0.0011968612670898438 seconds +3: Time to load utils op: 0.0015647411346435547 seconds +3: Time to load utils op: 0.0014767646789550781 seconds +3: Time to load utils op: 0.0015268325805664062 seconds +3: Time to load utils op: 0.0014972686767578125 seconds +3: Time to load utils op: 0.0014920234680175781 seconds +3: Time to load utils op: 0.0015294551849365234 seconds +3: Time to load utils op: 0.001592874526977539 seconds +0: [2023-03-13 23:18:11,608] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-03-13 23:18:11,609] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.31 GB Max_CA 0 GB +0: [2023-03-13 23:18:11,609] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:11,724] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-03-13 23:18:11,725] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-03-13 23:18:11,725] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:11,829] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-03-13 23:18:11,830] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-03-13 23:18:11,830] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:11,934] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-03-13 23:18:11,934] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:11,935] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,037] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-03-13 23:18:12,037] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:12,037] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,141] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-03-13 23:18:12,142] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:12,142] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,244] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-03-13 23:18:12,244] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:12,245] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,352] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-03-13 23:18:12,352] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:12,353] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,456] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-03-13 23:18:12,456] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-13 23:18:12,456] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-13 23:18:12,456] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-03-13 23:18:12,457] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-03-13 23:18:12,457] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-03-13 23:18:12,457] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0, 0.0, 0.0], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-03-13 23:18:12,457] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-03-13 23:18:12,457] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-03-13 23:18:12,457] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-03-13 23:18:12,457] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-03-13 23:18:12,458] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-03-13 23:18:12,459] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-03-13 23:18:12,459] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.00042247772216796875 seconds +0: [2023-03-13 23:18:12,460] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-03-13 23:18:12,470] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=22 [0, 22) STAGE_PARAMS=146525952 (146.526M) TOTAL_PARAMS=146525952 (146.526M) UNIQUE_PARAMS=146525952 (146.526M) +0: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: WARNING: could not find the metadata file checkpoints_146m14b100m +0: will not load any checkpoints and will start from random +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,478] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +4: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +6: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +0: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +5: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +2: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +3: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +1: [2023-03-13 23:18:12,479] [WARNING] [engine.py:2581:load_checkpoint] Unable to find latest file at checkpoints_146m14b100m/latest, if trying to load latest checkpoint please ensure this file exists or pass an explicit checkpoint tag when loading a checkpoint. +7: time (ms) | load-checkpoint: 7.24 +0: estimated model parameters: 0.146525952 +0: estimated model parameters without embeddings: 0.106319616 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-03-13 23:18:12 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 5517578 +0: validation: 5632 +0: test: 256 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.007648 seconds +0: number of documents: 208931 +0: > dataset split: +0: train: +0: document indices in [0, 208931) total of 208931 documents +0: > WARNING: could not find index map files, building the indices on rank 0 ... +0: > last epoch number of samples (2627) is smaller than 95.0% of number of samples per epoch (48804), setting separate_last_epoch to True +0: > elasped time to build and save doc-idx mapping (seconds): 1.348066 +0: using: +0: number of documents: 208931 +0: number of epochs: 114 +0: sequence length: 2048 +0: total number of samples: 5563756 +0: > elasped time to build and save sample-idx mapping (seconds): 0.179848 +0: > building shuffle index with split [0, 5514951) and [5514951, 5563756) ... +0: > elasped time to build and save shuffle-idx mapping (seconds): 0.146082 +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_5517578ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_5517578ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_5517578ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.010 seconds +0: total number of samples: 5563757 +0: total number of epochs: 114 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.044937 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_5632ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_5632ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_5632ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.070 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-03-13 23:18:25 +0: done with setup ... +0: training ... +0: Number of parameters: [tensor rank - pipeline rank] w/ and w/o embeddings: +7: time (ms) | model-and-optimizer-setup: 16706.86 | train/valid/test-data-iterators-setup: 12690.85 +0: [000-000] 0.1465B / 0.1063B +0: [before the start of training step] datetime: 2023-03-13 23:18:25 +0: [Rank 0] (after 10 iterations) memory (MB) | allocated: 2731.02197265625 | max allocated: 22586.583984375 | reserved: 23360.0 | max reserved: 23360.0 +7: iteration 10/ 21553 | consumed samples: 2560 | consumed tokens: 5242880 | elapsed time per iteration (s): 1.21 | learning rate: 9.279E-06 | global batch size: 256 | lm loss: 1.065629E+01 | grad norm: 5.312 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 212.151 | TFLOPs: 7.43 | +7: iteration 20/ 21553 | consumed samples: 5120 | consumed tokens: 10485760 | elapsed time per iteration (s): 0.32 | learning rate: 1.856E-05 | global batch size: 256 | lm loss: 9.737056E+00 | grad norm: 2.144 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 795.309 | TFLOPs: 27.84 | +7: iteration 30/ 21553 | consumed samples: 7680 | consumed tokens: 15728640 | elapsed time per iteration (s): 0.32 | learning rate: 2.784E-05 | global batch size: 256 | lm loss: 9.388904E+00 | grad norm: 1.897 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.437 | TFLOPs: 28.30 | +7: iteration 40/ 21553 | consumed samples: 10240 | consumed tokens: 20971520 | elapsed time per iteration (s): 0.33 | learning rate: 3.712E-05 | global batch size: 256 | lm loss: 9.008490E+00 | grad norm: 1.757 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 772.384 | TFLOPs: 27.04 | +7: iteration 50/ 21553 | consumed samples: 12800 | consumed tokens: 26214400 | elapsed time per iteration (s): 0.31 | learning rate: 4.640E-05 | global batch size: 256 | lm loss: 8.614461E+00 | grad norm: 1.541 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 817.034 | TFLOPs: 28.60 | +7: iteration 60/ 21553 | consumed samples: 15360 | consumed tokens: 31457280 | elapsed time per iteration (s): 0.32 | learning rate: 5.568E-05 | global batch size: 256 | lm loss: 8.212153E+00 | grad norm: 1.233 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 804.560 | TFLOPs: 28.17 | +7: iteration 70/ 21553 | consumed samples: 17920 | consumed tokens: 36700160 | elapsed time per iteration (s): 0.33 | learning rate: 6.496E-05 | global batch size: 256 | lm loss: 7.801801E+00 | grad norm: 1.360 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 780.625 | TFLOPs: 27.33 | +7: iteration 80/ 21553 | consumed samples: 20480 | consumed tokens: 41943040 | elapsed time per iteration (s): 0.30 | learning rate: 7.424E-05 | global batch size: 256 | lm loss: 7.488914E+00 | grad norm: 1.102 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.765 | TFLOPs: 29.85 | +7: iteration 90/ 21553 | consumed samples: 23040 | consumed tokens: 47185920 | elapsed time per iteration (s): 0.31 | learning rate: 8.351E-05 | global batch size: 256 | lm loss: 7.241519E+00 | grad norm: 0.580 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 813.678 | TFLOPs: 28.48 | +7: iteration 100/ 21553 | consumed samples: 25600 | consumed tokens: 52428800 | elapsed time per iteration (s): 0.30 | learning rate: 9.279E-05 | global batch size: 256 | lm loss: 7.095027E+00 | grad norm: 0.983 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.634 | TFLOPs: 29.88 | +7: iteration 110/ 21553 | consumed samples: 28160 | consumed tokens: 57671680 | elapsed time per iteration (s): 0.31 | learning rate: 1.021E-04 | global batch size: 256 | lm loss: 6.977024E+00 | grad norm: 0.800 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.990 | TFLOPs: 29.30 | +7: iteration 120/ 21553 | consumed samples: 30720 | consumed tokens: 62914560 | elapsed time per iteration (s): 0.31 | learning rate: 1.114E-04 | global batch size: 256 | lm loss: 6.859487E+00 | grad norm: 1.370 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.270 | TFLOPs: 28.89 | +7: iteration 130/ 21553 | consumed samples: 33280 | consumed tokens: 68157440 | elapsed time per iteration (s): 0.30 | learning rate: 1.206E-04 | global batch size: 256 | lm loss: 6.761553E+00 | grad norm: 0.661 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.391 | TFLOPs: 30.05 | +7: iteration 140/ 21553 | consumed samples: 35840 | consumed tokens: 73400320 | elapsed time per iteration (s): 0.30 | learning rate: 1.299E-04 | global batch size: 256 | lm loss: 6.674681E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.537 | TFLOPs: 29.81 | +7: iteration 150/ 21553 | consumed samples: 38400 | consumed tokens: 78643200 | elapsed time per iteration (s): 0.30 | learning rate: 1.392E-04 | global batch size: 256 | lm loss: 6.628532E+00 | grad norm: 0.810 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.110 | TFLOPs: 29.41 | +7: iteration 160/ 21553 | consumed samples: 40960 | consumed tokens: 83886080 | elapsed time per iteration (s): 0.30 | learning rate: 1.485E-04 | global batch size: 256 | lm loss: 6.532838E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.884 | TFLOPs: 29.47 | +7: iteration 170/ 21553 | consumed samples: 43520 | consumed tokens: 89128960 | elapsed time per iteration (s): 0.32 | learning rate: 1.577E-04 | global batch size: 256 | lm loss: 6.488266E+00 | grad norm: 0.710 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 808.700 | TFLOPs: 28.31 | +7: iteration 180/ 21553 | consumed samples: 46080 | consumed tokens: 94371840 | elapsed time per iteration (s): 0.30 | learning rate: 1.670E-04 | global batch size: 256 | lm loss: 6.437945E+00 | grad norm: 0.663 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.140 | TFLOPs: 29.80 | +7: iteration 190/ 21553 | consumed samples: 48640 | consumed tokens: 99614720 | elapsed time per iteration (s): 0.30 | learning rate: 1.763E-04 | global batch size: 256 | lm loss: 6.400384E+00 | grad norm: 0.631 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.819 | TFLOPs: 29.40 | +7: iteration 200/ 21553 | consumed samples: 51200 | consumed tokens: 104857600 | elapsed time per iteration (s): 0.31 | learning rate: 1.856E-04 | global batch size: 256 | lm loss: 6.372983E+00 | grad norm: 0.680 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.000 | TFLOPs: 28.71 | +7: iteration 210/ 21553 | consumed samples: 53760 | consumed tokens: 110100480 | elapsed time per iteration (s): 0.31 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 6.315287E+00 | grad norm: 0.618 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.545 | TFLOPs: 29.11 | +7: iteration 220/ 21553 | consumed samples: 56320 | consumed tokens: 115343360 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.297536E+00 | grad norm: 0.615 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.338 | TFLOPs: 29.87 | +7: iteration 230/ 21553 | consumed samples: 58880 | consumed tokens: 120586240 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.249614E+00 | grad norm: 0.558 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.071 | TFLOPs: 28.95 | +7: iteration 240/ 21553 | consumed samples: 61440 | consumed tokens: 125829120 | elapsed time per iteration (s): 0.32 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.224615E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 794.573 | TFLOPs: 27.82 | +7: iteration 250/ 21553 | consumed samples: 64000 | consumed tokens: 131072000 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.187903E+00 | grad norm: 1.353 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.974 | TFLOPs: 29.37 | +7: iteration 260/ 21553 | consumed samples: 66560 | consumed tokens: 136314880 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.171604E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.630 | TFLOPs: 29.11 | +7: iteration 270/ 21553 | consumed samples: 69120 | consumed tokens: 141557760 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.141479E+00 | grad norm: 0.626 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.714 | TFLOPs: 29.43 | +7: iteration 280/ 21553 | consumed samples: 71680 | consumed tokens: 146800640 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.111045E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.394 | TFLOPs: 29.59 | +7: iteration 290/ 21553 | consumed samples: 74240 | consumed tokens: 152043520 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.074693E+00 | grad norm: 0.607 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.349 | TFLOPs: 29.84 | +7: iteration 300/ 21553 | consumed samples: 76800 | consumed tokens: 157286400 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.067384E+00 | grad norm: 0.722 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.987 | TFLOPs: 29.62 | +7: iteration 310/ 21553 | consumed samples: 79360 | consumed tokens: 162529280 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.063599E+00 | grad norm: 0.568 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.725 | TFLOPs: 29.82 | +7: iteration 320/ 21553 | consumed samples: 81920 | consumed tokens: 167772160 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 6.008112E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.304 | TFLOPs: 29.49 | +7: iteration 330/ 21553 | consumed samples: 84480 | consumed tokens: 173015040 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.986457E+00 | grad norm: 0.666 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.111 | TFLOPs: 30.25 | +7: iteration 340/ 21553 | consumed samples: 87040 | consumed tokens: 178257920 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.969135E+00 | grad norm: 0.582 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.823 | TFLOPs: 30.10 | +7: iteration 350/ 21553 | consumed samples: 89600 | consumed tokens: 183500800 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.952858E+00 | grad norm: 0.964 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.080 | TFLOPs: 30.04 | +7: iteration 360/ 21553 | consumed samples: 92160 | consumed tokens: 188743680 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.921765E+00 | grad norm: 0.644 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.719 | TFLOPs: 29.57 | +7: iteration 370/ 21553 | consumed samples: 94720 | consumed tokens: 193986560 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.896795E+00 | grad norm: 0.553 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.218 | TFLOPs: 29.76 | +7: iteration 380/ 21553 | consumed samples: 97280 | consumed tokens: 199229440 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.867725E+00 | grad norm: 0.798 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.671 | TFLOPs: 29.50 | +7: iteration 390/ 21553 | consumed samples: 99840 | consumed tokens: 204472320 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.819263E+00 | grad norm: 0.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.116 | TFLOPs: 29.80 | +7: iteration 400/ 21553 | consumed samples: 102400 | consumed tokens: 209715200 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.812424E+00 | grad norm: 0.790 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.890 | TFLOPs: 29.58 | +7: iteration 410/ 21553 | consumed samples: 104960 | consumed tokens: 214958080 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.783761E+00 | grad norm: 0.644 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.760 | TFLOPs: 29.78 | +7: iteration 420/ 21553 | consumed samples: 107520 | consumed tokens: 220200960 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.749289E+00 | grad norm: 0.638 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.760 | TFLOPs: 30.17 | +7: iteration 430/ 21553 | consumed samples: 110080 | consumed tokens: 225443840 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.730325E+00 | grad norm: 0.928 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.365 | TFLOPs: 29.77 | +7: iteration 440/ 21553 | consumed samples: 112640 | consumed tokens: 230686720 | elapsed time per iteration (s): 0.31 | learning rate: 2.000E-04 | global batch size: 256 | lm loss: 5.713474E+00 | grad norm: 0.843 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.844 | TFLOPs: 29.37 | +7: iteration 450/ 21553 | consumed samples: 115200 | consumed tokens: 235929600 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.674399E+00 | grad norm: 0.636 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.196 | TFLOPs: 30.04 | +7: iteration 460/ 21553 | consumed samples: 117760 | consumed tokens: 241172480 | elapsed time per iteration (s): 0.31 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.655425E+00 | grad norm: 0.828 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.241 | TFLOPs: 29.20 | +7: iteration 470/ 21553 | consumed samples: 120320 | consumed tokens: 246415360 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.637081E+00 | grad norm: 0.804 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.308 | TFLOPs: 29.80 | +7: iteration 480/ 21553 | consumed samples: 122880 | consumed tokens: 251658240 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.604951E+00 | grad norm: 0.526 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.283 | TFLOPs: 29.80 | +7: iteration 490/ 21553 | consumed samples: 125440 | consumed tokens: 256901120 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.580917E+00 | grad norm: 0.931 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.559 | TFLOPs: 30.09 | +7: iteration 500/ 21553 | consumed samples: 128000 | consumed tokens: 262144000 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.576551E+00 | grad norm: 0.772 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.138 | TFLOPs: 29.97 | +7: iteration 510/ 21553 | consumed samples: 130560 | consumed tokens: 267386880 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.539591E+00 | grad norm: 0.774 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.976 | TFLOPs: 30.07 | +7: iteration 520/ 21553 | consumed samples: 133120 | consumed tokens: 272629760 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.520704E+00 | grad norm: 0.687 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.358 | TFLOPs: 30.15 | +7: iteration 530/ 21553 | consumed samples: 135680 | consumed tokens: 277872640 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.490203E+00 | grad norm: 0.705 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.203 | TFLOPs: 29.83 | +7: iteration 540/ 21553 | consumed samples: 138240 | consumed tokens: 283115520 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.449935E+00 | grad norm: 1.016 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.197 | TFLOPs: 29.76 | +7: iteration 550/ 21553 | consumed samples: 140800 | consumed tokens: 288358400 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.457282E+00 | grad norm: 1.146 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.730 | TFLOPs: 29.64 | +7: iteration 560/ 21553 | consumed samples: 143360 | consumed tokens: 293601280 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.430496E+00 | grad norm: 0.651 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.870 | TFLOPs: 29.54 | +7: iteration 570/ 21553 | consumed samples: 145920 | consumed tokens: 298844160 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.400878E+00 | grad norm: 0.760 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.994 | TFLOPs: 29.83 | +7: iteration 580/ 21553 | consumed samples: 148480 | consumed tokens: 304087040 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.368337E+00 | grad norm: 0.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.585 | TFLOPs: 30.37 | +7: iteration 590/ 21553 | consumed samples: 151040 | consumed tokens: 309329920 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.350875E+00 | grad norm: 0.975 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.717 | TFLOPs: 30.06 | +7: iteration 600/ 21553 | consumed samples: 153600 | consumed tokens: 314572800 | elapsed time per iteration (s): 0.30 | learning rate: 1.999E-04 | global batch size: 256 | lm loss: 5.323694E+00 | grad norm: 0.681 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.956 | TFLOPs: 30.35 | +7: iteration 610/ 21553 | consumed samples: 156160 | consumed tokens: 319815680 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.303778E+00 | grad norm: 0.728 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.947 | TFLOPs: 30.00 | +7: iteration 620/ 21553 | consumed samples: 158720 | consumed tokens: 325058560 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.278022E+00 | grad norm: 0.698 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.947 | TFLOPs: 29.54 | +7: iteration 630/ 21553 | consumed samples: 161280 | consumed tokens: 330301440 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.257425E+00 | grad norm: 0.983 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.216 | TFLOPs: 29.41 | +7: iteration 640/ 21553 | consumed samples: 163840 | consumed tokens: 335544320 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.248305E+00 | grad norm: 0.617 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.915 | TFLOPs: 30.07 | +7: iteration 650/ 21553 | consumed samples: 166400 | consumed tokens: 340787200 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.228534E+00 | grad norm: 0.854 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.929 | TFLOPs: 29.82 | +7: iteration 660/ 21553 | consumed samples: 168960 | consumed tokens: 346030080 | elapsed time per iteration (s): 0.31 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.209506E+00 | grad norm: 0.770 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.945 | TFLOPs: 29.23 | +7: iteration 670/ 21553 | consumed samples: 171520 | consumed tokens: 351272960 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.186271E+00 | grad norm: 0.700 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.354 | TFLOPs: 29.52 | +7: iteration 680/ 21553 | consumed samples: 174080 | consumed tokens: 356515840 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.177826E+00 | grad norm: 0.905 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.107 | TFLOPs: 29.41 | +7: iteration 690/ 21553 | consumed samples: 176640 | consumed tokens: 361758720 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.159614E+00 | grad norm: 0.709 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.426 | TFLOPs: 29.67 | +7: iteration 700/ 21553 | consumed samples: 179200 | consumed tokens: 367001600 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.125995E+00 | grad norm: 1.245 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.612 | TFLOPs: 30.30 | +7: iteration 710/ 21553 | consumed samples: 181760 | consumed tokens: 372244480 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.116822E+00 | grad norm: 0.801 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.011 | TFLOPs: 29.58 | +7: iteration 720/ 21553 | consumed samples: 184320 | consumed tokens: 377487360 | elapsed time per iteration (s): 0.30 | learning rate: 1.998E-04 | global batch size: 256 | lm loss: 5.076189E+00 | grad norm: 0.909 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.623 | TFLOPs: 29.85 | +7: iteration 730/ 21553 | consumed samples: 186880 | consumed tokens: 382730240 | elapsed time per iteration (s): 0.31 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 5.071044E+00 | grad norm: 0.741 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.906 | TFLOPs: 29.33 | +7: iteration 740/ 21553 | consumed samples: 189440 | consumed tokens: 387973120 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 5.052738E+00 | grad norm: 0.979 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.752 | TFLOPs: 30.31 | +7: iteration 750/ 21553 | consumed samples: 192000 | consumed tokens: 393216000 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 5.040396E+00 | grad norm: 0.907 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.427 | TFLOPs: 30.30 | +7: iteration 760/ 21553 | consumed samples: 194560 | consumed tokens: 398458880 | elapsed time per iteration (s): 0.31 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 5.014180E+00 | grad norm: 1.025 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.537 | TFLOPs: 28.97 | +7: iteration 770/ 21553 | consumed samples: 197120 | consumed tokens: 403701760 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 5.000209E+00 | grad norm: 0.670 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.112 | TFLOPs: 30.29 | +7: iteration 780/ 21553 | consumed samples: 199680 | consumed tokens: 408944640 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 4.975867E+00 | grad norm: 1.123 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.669 | TFLOPs: 30.20 | +7: iteration 790/ 21553 | consumed samples: 202240 | consumed tokens: 414187520 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 4.949816E+00 | grad norm: 0.803 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.658 | TFLOPs: 29.46 | +7: iteration 800/ 21553 | consumed samples: 204800 | consumed tokens: 419430400 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 4.928473E+00 | grad norm: 0.873 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.859 | TFLOPs: 30.31 | +7: iteration 810/ 21553 | consumed samples: 207360 | consumed tokens: 424673280 | elapsed time per iteration (s): 0.30 | learning rate: 1.997E-04 | global batch size: 256 | lm loss: 4.918216E+00 | grad norm: 0.933 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.821 | TFLOPs: 30.24 | +7: iteration 820/ 21553 | consumed samples: 209920 | consumed tokens: 429916160 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.915439E+00 | grad norm: 0.975 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.655 | TFLOPs: 29.78 | +7: iteration 830/ 21553 | consumed samples: 212480 | consumed tokens: 435159040 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.877870E+00 | grad norm: 1.211 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.109 | TFLOPs: 29.97 | +7: iteration 840/ 21553 | consumed samples: 215040 | consumed tokens: 440401920 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.853703E+00 | grad norm: 0.994 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.708 | TFLOPs: 30.38 | +7: iteration 850/ 21553 | consumed samples: 217600 | consumed tokens: 445644800 | elapsed time per iteration (s): 0.31 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.829906E+00 | grad norm: 0.827 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.238 | TFLOPs: 29.27 | +7: iteration 860/ 21553 | consumed samples: 220160 | consumed tokens: 450887680 | elapsed time per iteration (s): 0.29 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.809805E+00 | grad norm: 0.874 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.098 | TFLOPs: 30.39 | +7: iteration 870/ 21553 | consumed samples: 222720 | consumed tokens: 456130560 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.794161E+00 | grad norm: 0.630 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.848 | TFLOPs: 30.07 | +7: iteration 880/ 21553 | consumed samples: 225280 | consumed tokens: 461373440 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.763992E+00 | grad norm: 0.680 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.712 | TFLOPs: 30.38 | +7: iteration 890/ 21553 | consumed samples: 227840 | consumed tokens: 466616320 | elapsed time per iteration (s): 0.30 | learning rate: 1.996E-04 | global batch size: 256 | lm loss: 4.735321E+00 | grad norm: 0.932 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.150 | TFLOPs: 30.04 | +7: iteration 900/ 21553 | consumed samples: 230400 | consumed tokens: 471859200 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.726157E+00 | grad norm: 0.853 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.394 | TFLOPs: 30.30 | +7: iteration 910/ 21553 | consumed samples: 232960 | consumed tokens: 477102080 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.717962E+00 | grad norm: 0.907 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.481 | TFLOPs: 29.77 | +7: iteration 920/ 21553 | consumed samples: 235520 | consumed tokens: 482344960 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.704901E+00 | grad norm: 0.702 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.559 | TFLOPs: 30.09 | +7: iteration 930/ 21553 | consumed samples: 238080 | consumed tokens: 487587840 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.677743E+00 | grad norm: 0.548 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.471 | TFLOPs: 29.74 | +7: iteration 940/ 21553 | consumed samples: 240640 | consumed tokens: 492830720 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.654146E+00 | grad norm: 0.622 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.399 | TFLOPs: 29.42 | +7: iteration 950/ 21553 | consumed samples: 243200 | consumed tokens: 498073600 | elapsed time per iteration (s): 0.30 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.626780E+00 | grad norm: 0.977 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.328 | TFLOPs: 30.12 | +7: iteration 960/ 21553 | consumed samples: 245760 | consumed tokens: 503316480 | elapsed time per iteration (s): 0.29 | learning rate: 1.995E-04 | global batch size: 256 | lm loss: 4.628882E+00 | grad norm: 1.078 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.871 | TFLOPs: 30.38 | +7: iteration 970/ 21553 | consumed samples: 248320 | consumed tokens: 508559360 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.623691E+00 | grad norm: 0.768 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.004 | TFLOPs: 29.93 | +7: iteration 980/ 21553 | consumed samples: 250880 | consumed tokens: 513802240 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.591221E+00 | grad norm: 0.617 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.875 | TFLOPs: 29.82 | +7: iteration 990/ 21553 | consumed samples: 253440 | consumed tokens: 519045120 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.592892E+00 | grad norm: 0.850 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.007 | TFLOPs: 30.07 | +7: iteration 1000/ 21553 | consumed samples: 256000 | consumed tokens: 524288000 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.583277E+00 | grad norm: 1.015 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.370 | TFLOPs: 30.15 | +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 1000 | lm loss value: 4.575716E+00 | lm loss PPL: 9.709758E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 1000 to checkpoints_146m14b100m +0: [2023-03-13 23:23:37,649] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step1000 is begin to save! +0: [2023-03-13 23:23:37,665] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:23:37,771] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:23:37,772] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:23:37,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:23:37,789] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:23:37,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:23:37,804] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:23:37,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:23:37,819] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:23:37,834] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:23:37,834] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:23:37,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:23:37,849] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:23:37,864] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:23:37,864] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:23:37,879] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:23:37,879] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:23:37,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:23:37,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:23:37,909] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:23:37,909] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:23:37,924] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:23:37,924] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:23:37,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:23:37,940] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:23:37,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:23:37,955] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:23:37,970] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:23:37,971] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:23:37,986] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:23:37,986] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:23:38,001] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:23:38,001] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:23:38,002] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:23:38,003] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step1000/mp_rank_00_model_states.pt +0: [2023-03-13 23:23:38,003] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:23:38,006] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:23:38,024] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:23:38,025] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:23:38,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,069] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,069] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,069] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,069] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,069] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,070] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,071] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,071] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +7: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:23:38,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:23:38,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,075] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +1: [2023-03-13 23:23:38,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,077] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,077] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:23:38,078] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-13 23:23:38,078] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,080] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,081] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +3: [2023-03-13 23:23:38,082] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:23:38,083] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-13 23:23:38,083] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,074] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,086] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,086] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,086] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,086] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,086] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: [2023-03-13 23:23:38,086] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,091] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,091] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,091] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,091] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:23:38,091] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,091] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +2: [2023-03-13 23:23:38,094] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:23:38,094] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:23:38,095] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,095] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:23:38,095] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +4: [2023-03-13 23:23:38,095] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +0: [2023-03-13 23:23:38,095] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +4: [2023-03-13 23:23:38,095] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,096] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,097] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,097] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +5: [2023-03-13 23:23:38,102] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:23:38,102] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step1000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:23:38,102] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step1000 is ready now! +0: successfully saved checkpoint at iteration 1000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 462.69 +7: iteration 1010/ 21553 | consumed samples: 258560 | consumed tokens: 529530880 | elapsed time per iteration (s): 0.35 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.572918E+00 | grad norm: 0.610 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 731.108 | TFLOPs: 25.59 | +7: iteration 1020/ 21553 | consumed samples: 261120 | consumed tokens: 534773760 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.559391E+00 | grad norm: 0.858 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.737 | TFLOPs: 30.38 | +7: iteration 1030/ 21553 | consumed samples: 263680 | consumed tokens: 540016640 | elapsed time per iteration (s): 0.30 | learning rate: 1.994E-04 | global batch size: 256 | lm loss: 4.544674E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.857 | TFLOPs: 30.03 | +7: iteration 1040/ 21553 | consumed samples: 266240 | consumed tokens: 545259520 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.508098E+00 | grad norm: 0.765 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.096 | TFLOPs: 30.07 | +7: iteration 1050/ 21553 | consumed samples: 268800 | consumed tokens: 550502400 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.516635E+00 | grad norm: 0.845 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.243 | TFLOPs: 30.11 | +7: iteration 1060/ 21553 | consumed samples: 271360 | consumed tokens: 555745280 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.506526E+00 | grad norm: 0.677 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.362 | TFLOPs: 30.36 | +7: iteration 1070/ 21553 | consumed samples: 273920 | consumed tokens: 560988160 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.491579E+00 | grad norm: 0.736 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.221 | TFLOPs: 29.73 | +7: iteration 1080/ 21553 | consumed samples: 276480 | consumed tokens: 566231040 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.493756E+00 | grad norm: 0.758 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.511 | TFLOPs: 30.26 | +7: iteration 1090/ 21553 | consumed samples: 279040 | consumed tokens: 571473920 | elapsed time per iteration (s): 0.30 | learning rate: 1.993E-04 | global batch size: 256 | lm loss: 4.467576E+00 | grad norm: 0.578 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.687 | TFLOPs: 29.92 | +7: iteration 1100/ 21553 | consumed samples: 281600 | consumed tokens: 576716800 | elapsed time per iteration (s): 0.30 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 4.466705E+00 | grad norm: 0.608 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.072 | TFLOPs: 30.04 | +7: iteration 1110/ 21553 | consumed samples: 284160 | consumed tokens: 581959680 | elapsed time per iteration (s): 0.30 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 4.452636E+00 | grad norm: 0.695 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.776 | TFLOPs: 30.13 | +7: iteration 1120/ 21553 | consumed samples: 286720 | consumed tokens: 587202560 | elapsed time per iteration (s): 0.29 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 4.449242E+00 | grad norm: 0.822 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.001 | TFLOPs: 30.39 | +7: iteration 1130/ 21553 | consumed samples: 289280 | consumed tokens: 592445440 | elapsed time per iteration (s): 0.30 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 4.445512E+00 | grad norm: 0.628 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.752 | TFLOPs: 30.03 | +7: iteration 1140/ 21553 | consumed samples: 291840 | consumed tokens: 597688320 | elapsed time per iteration (s): 0.30 | learning rate: 1.992E-04 | global batch size: 256 | lm loss: 4.423621E+00 | grad norm: 0.657 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.480 | TFLOPs: 30.37 | +7: iteration 1150/ 21553 | consumed samples: 294400 | consumed tokens: 602931200 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.421928E+00 | grad norm: 0.796 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.268 | TFLOPs: 30.26 | +7: iteration 1160/ 21553 | consumed samples: 296960 | consumed tokens: 608174080 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.414553E+00 | grad norm: 0.712 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.429 | TFLOPs: 29.95 | +7: iteration 1170/ 21553 | consumed samples: 299520 | consumed tokens: 613416960 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.399873E+00 | grad norm: 0.724 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.859 | TFLOPs: 30.00 | +7: iteration 1180/ 21553 | consumed samples: 302080 | consumed tokens: 618659840 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.397679E+00 | grad norm: 0.856 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.434 | TFLOPs: 30.26 | +7: iteration 1190/ 21553 | consumed samples: 304640 | consumed tokens: 623902720 | elapsed time per iteration (s): 0.29 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.394102E+00 | grad norm: 0.597 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.071 | TFLOPs: 30.39 | +7: iteration 1200/ 21553 | consumed samples: 307200 | consumed tokens: 629145600 | elapsed time per iteration (s): 0.30 | learning rate: 1.991E-04 | global batch size: 256 | lm loss: 4.370040E+00 | grad norm: 0.817 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.503 | TFLOPs: 30.02 | +7: iteration 1210/ 21553 | consumed samples: 309760 | consumed tokens: 634388480 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 4.377884E+00 | grad norm: 0.598 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.131 | TFLOPs: 30.04 | +7: iteration 1220/ 21553 | consumed samples: 312320 | consumed tokens: 639631360 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 4.372319E+00 | grad norm: 0.686 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.396 | TFLOPs: 30.02 | +7: iteration 1230/ 21553 | consumed samples: 314880 | consumed tokens: 644874240 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 4.344854E+00 | grad norm: 0.702 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.441 | TFLOPs: 29.74 | +7: iteration 1240/ 21553 | consumed samples: 317440 | consumed tokens: 650117120 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 4.356188E+00 | grad norm: 0.593 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.120 | TFLOPs: 29.83 | +7: iteration 1250/ 21553 | consumed samples: 320000 | consumed tokens: 655360000 | elapsed time per iteration (s): 0.30 | learning rate: 1.990E-04 | global batch size: 256 | lm loss: 4.335840E+00 | grad norm: 0.548 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.901 | TFLOPs: 30.03 | +7: iteration 1260/ 21553 | consumed samples: 322560 | consumed tokens: 660602880 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 4.328419E+00 | grad norm: 0.809 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.619 | TFLOPs: 30.02 | +7: iteration 1270/ 21553 | consumed samples: 325120 | consumed tokens: 665845760 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 4.334599E+00 | grad norm: 0.550 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.252 | TFLOPs: 29.80 | +7: iteration 1280/ 21553 | consumed samples: 327680 | consumed tokens: 671088640 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 4.333066E+00 | grad norm: 0.693 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.948 | TFLOPs: 30.07 | +7: iteration 1290/ 21553 | consumed samples: 330240 | consumed tokens: 676331520 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 4.320369E+00 | grad norm: 0.578 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.004 | TFLOPs: 30.35 | +7: iteration 1300/ 21553 | consumed samples: 332800 | consumed tokens: 681574400 | elapsed time per iteration (s): 0.30 | learning rate: 1.989E-04 | global batch size: 256 | lm loss: 4.293312E+00 | grad norm: 0.578 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.681 | TFLOPs: 29.99 | +7: iteration 1310/ 21553 | consumed samples: 335360 | consumed tokens: 686817280 | elapsed time per iteration (s): 0.30 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 4.294207E+00 | grad norm: 0.675 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.964 | TFLOPs: 29.72 | +7: iteration 1320/ 21553 | consumed samples: 337920 | consumed tokens: 692060160 | elapsed time per iteration (s): 0.29 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 4.276421E+00 | grad norm: 0.706 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.138 | TFLOPs: 30.39 | +7: iteration 1330/ 21553 | consumed samples: 340480 | consumed tokens: 697303040 | elapsed time per iteration (s): 0.30 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 4.285069E+00 | grad norm: 0.650 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.802 | TFLOPs: 30.06 | +7: iteration 1340/ 21553 | consumed samples: 343040 | consumed tokens: 702545920 | elapsed time per iteration (s): 0.30 | learning rate: 1.988E-04 | global batch size: 256 | lm loss: 4.280922E+00 | grad norm: 0.660 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.532 | TFLOPs: 30.37 | +7: iteration 1350/ 21553 | consumed samples: 345600 | consumed tokens: 707788800 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 4.283808E+00 | grad norm: 0.680 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.897 | TFLOPs: 30.28 | +7: iteration 1360/ 21553 | consumed samples: 348160 | consumed tokens: 713031680 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 4.277126E+00 | grad norm: 0.596 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.635 | TFLOPs: 29.99 | +7: iteration 1370/ 21553 | consumed samples: 350720 | consumed tokens: 718274560 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 4.262143E+00 | grad norm: 0.635 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.048 | TFLOPs: 30.00 | +7: iteration 1380/ 21553 | consumed samples: 353280 | consumed tokens: 723517440 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 4.244954E+00 | grad norm: 0.624 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.181 | TFLOPs: 29.97 | +7: iteration 1390/ 21553 | consumed samples: 355840 | consumed tokens: 728760320 | elapsed time per iteration (s): 0.30 | learning rate: 1.987E-04 | global batch size: 256 | lm loss: 4.249236E+00 | grad norm: 0.596 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.980 | TFLOPs: 30.35 | +7: iteration 1400/ 21553 | consumed samples: 358400 | consumed tokens: 734003200 | elapsed time per iteration (s): 0.30 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 4.217907E+00 | grad norm: 0.759 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.216 | TFLOPs: 30.01 | +7: iteration 1410/ 21553 | consumed samples: 360960 | consumed tokens: 739246080 | elapsed time per iteration (s): 0.30 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 4.236289E+00 | grad norm: 0.556 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.744 | TFLOPs: 30.38 | +7: iteration 1420/ 21553 | consumed samples: 363520 | consumed tokens: 744488960 | elapsed time per iteration (s): 0.30 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 4.240931E+00 | grad norm: 0.758 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.358 | TFLOPs: 29.73 | +7: iteration 1430/ 21553 | consumed samples: 366080 | consumed tokens: 749731840 | elapsed time per iteration (s): 0.29 | learning rate: 1.986E-04 | global batch size: 256 | lm loss: 4.243861E+00 | grad norm: 0.593 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.176 | TFLOPs: 30.39 | +7: iteration 1440/ 21553 | consumed samples: 368640 | consumed tokens: 754974720 | elapsed time per iteration (s): 0.30 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 4.223981E+00 | grad norm: 0.599 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.532 | TFLOPs: 29.77 | +7: iteration 1450/ 21553 | consumed samples: 371200 | consumed tokens: 760217600 | elapsed time per iteration (s): 0.29 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 4.221473E+00 | grad norm: 0.568 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.275 | TFLOPs: 30.40 | +7: iteration 1460/ 21553 | consumed samples: 373760 | consumed tokens: 765460480 | elapsed time per iteration (s): 0.30 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 4.199206E+00 | grad norm: 0.589 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.420 | TFLOPs: 29.98 | +7: iteration 1470/ 21553 | consumed samples: 376320 | consumed tokens: 770703360 | elapsed time per iteration (s): 0.30 | learning rate: 1.985E-04 | global batch size: 256 | lm loss: 4.203186E+00 | grad norm: 0.624 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.888 | TFLOPs: 30.17 | +7: iteration 1480/ 21553 | consumed samples: 378880 | consumed tokens: 775946240 | elapsed time per iteration (s): 0.30 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 4.209589E+00 | grad norm: 0.679 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.215 | TFLOPs: 29.97 | +7: iteration 1490/ 21553 | consumed samples: 381440 | consumed tokens: 781189120 | elapsed time per iteration (s): 0.30 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 4.190160E+00 | grad norm: 0.857 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.567 | TFLOPs: 29.85 | +7: iteration 1500/ 21553 | consumed samples: 384000 | consumed tokens: 786432000 | elapsed time per iteration (s): 0.29 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 4.213414E+00 | grad norm: 0.627 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.924 | TFLOPs: 30.42 | +7: iteration 1510/ 21553 | consumed samples: 386560 | consumed tokens: 791674880 | elapsed time per iteration (s): 0.30 | learning rate: 1.984E-04 | global batch size: 256 | lm loss: 4.178724E+00 | grad norm: 0.547 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.708 | TFLOPs: 30.31 | +7: iteration 1520/ 21553 | consumed samples: 389120 | consumed tokens: 796917760 | elapsed time per iteration (s): 0.30 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 4.176616E+00 | grad norm: 0.649 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.925 | TFLOPs: 29.72 | +7: iteration 1530/ 21553 | consumed samples: 391680 | consumed tokens: 802160640 | elapsed time per iteration (s): 0.30 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 4.166917E+00 | grad norm: 0.561 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.701 | TFLOPs: 30.24 | +7: iteration 1540/ 21553 | consumed samples: 394240 | consumed tokens: 807403520 | elapsed time per iteration (s): 0.30 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 4.179015E+00 | grad norm: 0.530 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.808 | TFLOPs: 29.99 | +7: iteration 1550/ 21553 | consumed samples: 396800 | consumed tokens: 812646400 | elapsed time per iteration (s): 0.30 | learning rate: 1.983E-04 | global batch size: 256 | lm loss: 4.159331E+00 | grad norm: 0.547 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.574 | TFLOPs: 30.09 | +7: iteration 1560/ 21553 | consumed samples: 399360 | consumed tokens: 817889280 | elapsed time per iteration (s): 0.30 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 4.158260E+00 | grad norm: 0.798 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.618 | TFLOPs: 30.30 | +7: iteration 1570/ 21553 | consumed samples: 401920 | consumed tokens: 823132160 | elapsed time per iteration (s): 0.30 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 4.174210E+00 | grad norm: 0.553 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.620 | TFLOPs: 30.23 | +7: iteration 1580/ 21553 | consumed samples: 404480 | consumed tokens: 828375040 | elapsed time per iteration (s): 0.29 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 4.146910E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.156 | TFLOPs: 30.39 | +7: iteration 1590/ 21553 | consumed samples: 407040 | consumed tokens: 833617920 | elapsed time per iteration (s): 0.29 | learning rate: 1.982E-04 | global batch size: 256 | lm loss: 4.146625E+00 | grad norm: 0.637 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.923 | TFLOPs: 30.42 | +7: iteration 1600/ 21553 | consumed samples: 409600 | consumed tokens: 838860800 | elapsed time per iteration (s): 0.29 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 4.140623E+00 | grad norm: 0.786 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.172 | TFLOPs: 30.39 | +7: iteration 1610/ 21553 | consumed samples: 412160 | consumed tokens: 844103680 | elapsed time per iteration (s): 0.29 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 4.141808E+00 | grad norm: 0.552 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.438 | TFLOPs: 30.40 | +7: iteration 1620/ 21553 | consumed samples: 414720 | consumed tokens: 849346560 | elapsed time per iteration (s): 0.30 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 4.138956E+00 | grad norm: 0.813 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.922 | TFLOPs: 30.07 | +7: iteration 1630/ 21553 | consumed samples: 417280 | consumed tokens: 854589440 | elapsed time per iteration (s): 0.30 | learning rate: 1.981E-04 | global batch size: 256 | lm loss: 4.132388E+00 | grad norm: 0.558 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.698 | TFLOPs: 30.31 | +7: iteration 1640/ 21553 | consumed samples: 419840 | consumed tokens: 859832320 | elapsed time per iteration (s): 0.29 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 4.132615E+00 | grad norm: 0.699 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.269 | TFLOPs: 30.40 | +7: iteration 1650/ 21553 | consumed samples: 422400 | consumed tokens: 865075200 | elapsed time per iteration (s): 0.30 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 4.115030E+00 | grad norm: 0.511 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.067 | TFLOPs: 29.93 | +7: iteration 1660/ 21553 | consumed samples: 424960 | consumed tokens: 870318080 | elapsed time per iteration (s): 0.30 | learning rate: 1.980E-04 | global batch size: 256 | lm loss: 4.106730E+00 | grad norm: 0.563 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.941 | TFLOPs: 30.28 | +7: iteration 1670/ 21553 | consumed samples: 427520 | consumed tokens: 875560960 | elapsed time per iteration (s): 0.30 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 4.109325E+00 | grad norm: 0.519 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.324 | TFLOPs: 30.01 | +7: iteration 1680/ 21553 | consumed samples: 430080 | consumed tokens: 880803840 | elapsed time per iteration (s): 0.30 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 4.110499E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.098 | TFLOPs: 30.28 | +7: iteration 1690/ 21553 | consumed samples: 432640 | consumed tokens: 886046720 | elapsed time per iteration (s): 0.30 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 4.098391E+00 | grad norm: 0.602 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.232 | TFLOPs: 30.29 | +7: iteration 1700/ 21553 | consumed samples: 435200 | consumed tokens: 891289600 | elapsed time per iteration (s): 0.30 | learning rate: 1.979E-04 | global batch size: 256 | lm loss: 4.114236E+00 | grad norm: 0.649 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.702 | TFLOPs: 30.38 | +7: iteration 1710/ 21553 | consumed samples: 437760 | consumed tokens: 896532480 | elapsed time per iteration (s): 0.30 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 4.117744E+00 | grad norm: 0.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.786 | TFLOPs: 30.38 | +7: iteration 1720/ 21553 | consumed samples: 440320 | consumed tokens: 901775360 | elapsed time per iteration (s): 0.29 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 4.082025E+00 | grad norm: 0.630 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.895 | TFLOPs: 30.38 | +7: iteration 1730/ 21553 | consumed samples: 442880 | consumed tokens: 907018240 | elapsed time per iteration (s): 0.30 | learning rate: 1.978E-04 | global batch size: 256 | lm loss: 4.080646E+00 | grad norm: 0.539 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.619 | TFLOPs: 30.37 | +7: iteration 1740/ 21553 | consumed samples: 445440 | consumed tokens: 912261120 | elapsed time per iteration (s): 0.29 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 4.063961E+00 | grad norm: 0.557 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.081 | TFLOPs: 30.39 | +7: iteration 1750/ 21553 | consumed samples: 448000 | consumed tokens: 917504000 | elapsed time per iteration (s): 0.29 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 4.080761E+00 | grad norm: 0.591 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.825 | TFLOPs: 30.38 | +7: iteration 1760/ 21553 | consumed samples: 450560 | consumed tokens: 922746880 | elapsed time per iteration (s): 0.30 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 4.065882E+00 | grad norm: 0.670 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.735 | TFLOPs: 30.31 | +7: iteration 1770/ 21553 | consumed samples: 453120 | consumed tokens: 927989760 | elapsed time per iteration (s): 0.30 | learning rate: 1.977E-04 | global batch size: 256 | lm loss: 4.070360E+00 | grad norm: 0.584 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.517 | TFLOPs: 30.37 | +7: iteration 1780/ 21553 | consumed samples: 455680 | consumed tokens: 933232640 | elapsed time per iteration (s): 0.30 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 4.061099E+00 | grad norm: 0.504 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.541 | TFLOPs: 30.06 | +7: iteration 1790/ 21553 | consumed samples: 458240 | consumed tokens: 938475520 | elapsed time per iteration (s): 0.30 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 4.069643E+00 | grad norm: 0.593 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.787 | TFLOPs: 30.38 | +7: iteration 1800/ 21553 | consumed samples: 460800 | consumed tokens: 943718400 | elapsed time per iteration (s): 0.30 | learning rate: 1.976E-04 | global batch size: 256 | lm loss: 4.059079E+00 | grad norm: 0.524 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.677 | TFLOPs: 30.37 | +7: iteration 1810/ 21553 | consumed samples: 463360 | consumed tokens: 948961280 | elapsed time per iteration (s): 0.30 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 4.057092E+00 | grad norm: 0.658 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.997 | TFLOPs: 30.07 | +7: iteration 1820/ 21553 | consumed samples: 465920 | consumed tokens: 954204160 | elapsed time per iteration (s): 0.30 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 4.041616E+00 | grad norm: 0.502 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.355 | TFLOPs: 30.08 | +7: iteration 1830/ 21553 | consumed samples: 468480 | consumed tokens: 959447040 | elapsed time per iteration (s): 0.30 | learning rate: 1.975E-04 | global batch size: 256 | lm loss: 4.044041E+00 | grad norm: 0.576 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.194 | TFLOPs: 30.36 | +7: iteration 1840/ 21553 | consumed samples: 471040 | consumed tokens: 964689920 | elapsed time per iteration (s): 0.30 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 4.043372E+00 | grad norm: 0.621 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.131 | TFLOPs: 30.36 | +7: iteration 1850/ 21553 | consumed samples: 473600 | consumed tokens: 969932800 | elapsed time per iteration (s): 0.30 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 4.031876E+00 | grad norm: 0.529 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.338 | TFLOPs: 30.36 | +7: iteration 1860/ 21553 | consumed samples: 476160 | consumed tokens: 975175680 | elapsed time per iteration (s): 0.30 | learning rate: 1.974E-04 | global batch size: 256 | lm loss: 4.027157E+00 | grad norm: 0.577 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.725 | TFLOPs: 30.38 | +7: iteration 1870/ 21553 | consumed samples: 478720 | consumed tokens: 980418560 | elapsed time per iteration (s): 0.30 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 4.015795E+00 | grad norm: 0.553 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.720 | TFLOPs: 30.34 | +7: iteration 1880/ 21553 | consumed samples: 481280 | consumed tokens: 985661440 | elapsed time per iteration (s): 0.30 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 4.034296E+00 | grad norm: 0.592 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.632 | TFLOPs: 30.37 | +7: iteration 1890/ 21553 | consumed samples: 483840 | consumed tokens: 990904320 | elapsed time per iteration (s): 0.29 | learning rate: 1.973E-04 | global batch size: 256 | lm loss: 4.032486E+00 | grad norm: 0.626 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.046 | TFLOPs: 30.39 | +7: iteration 1900/ 21553 | consumed samples: 486400 | consumed tokens: 996147200 | elapsed time per iteration (s): 0.30 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 4.020764E+00 | grad norm: 0.671 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.332 | TFLOPs: 29.94 | +7: iteration 1910/ 21553 | consumed samples: 488960 | consumed tokens: 1001390080 | elapsed time per iteration (s): 0.30 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 4.023343E+00 | grad norm: 0.549 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.179 | TFLOPs: 30.36 | +7: iteration 1920/ 21553 | consumed samples: 491520 | consumed tokens: 1006632960 | elapsed time per iteration (s): 0.30 | learning rate: 1.972E-04 | global batch size: 256 | lm loss: 4.016448E+00 | grad norm: 0.528 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.118 | TFLOPs: 30.29 | +7: iteration 1930/ 21553 | consumed samples: 494080 | consumed tokens: 1011875840 | elapsed time per iteration (s): 0.31 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 4.024774E+00 | grad norm: 0.561 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 825.583 | TFLOPs: 28.90 | +7: iteration 1940/ 21553 | consumed samples: 496640 | consumed tokens: 1017118720 | elapsed time per iteration (s): 0.30 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 4.015565E+00 | grad norm: 0.638 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.475 | TFLOPs: 30.37 | +7: iteration 1950/ 21553 | consumed samples: 499200 | consumed tokens: 1022361600 | elapsed time per iteration (s): 0.30 | learning rate: 1.971E-04 | global batch size: 256 | lm loss: 4.001971E+00 | grad norm: 0.638 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.368 | TFLOPs: 30.12 | +7: iteration 1960/ 21553 | consumed samples: 501760 | consumed tokens: 1027604480 | elapsed time per iteration (s): 0.30 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 4.000729E+00 | grad norm: 0.540 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.316 | TFLOPs: 30.01 | +7: iteration 1970/ 21553 | consumed samples: 504320 | consumed tokens: 1032847360 | elapsed time per iteration (s): 0.29 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 4.000861E+00 | grad norm: 0.556 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.464 | TFLOPs: 30.40 | +7: iteration 1980/ 21553 | consumed samples: 506880 | consumed tokens: 1038090240 | elapsed time per iteration (s): 0.29 | learning rate: 1.970E-04 | global batch size: 256 | lm loss: 3.993961E+00 | grad norm: 0.592 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.860 | TFLOPs: 30.38 | +7: iteration 1990/ 21553 | consumed samples: 509440 | consumed tokens: 1043333120 | elapsed time per iteration (s): 0.30 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.979549E+00 | grad norm: 0.663 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.335 | TFLOPs: 30.36 | +0: [2023-03-13 23:28:35,141] [INFO] [logging.py:68:log_dist] [Rank 0] step=2000, skipped=0, lr=[0.00019691153209285103, 0.00019691153209285103, 0.00019691153209285103], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 2000/ 21553 | consumed samples: 512000 | consumed tokens: 1048576000 | elapsed time per iteration (s): 0.29 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.967605E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.829 | TFLOPs: 30.38 | +0: steps: 2000 loss: 3.9371 iter time (s): 0.302 samples/sec: 846.568 +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 2000 | lm loss value: 4.017185E+00 | lm loss PPL: 5.554454E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 2000 to checkpoints_146m14b100m +0: [2023-03-13 23:28:35,260] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step2000 is begin to save! +0: [2023-03-13 23:28:35,263] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:28:35,348] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:28:35,348] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:28:35,364] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:28:35,364] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:28:35,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:28:35,379] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:28:35,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:28:35,395] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:28:35,410] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:28:35,410] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:28:35,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:28:35,426] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:28:35,441] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:28:35,441] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:28:35,456] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:28:35,456] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:28:35,472] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:28:35,472] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:28:35,487] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:28:35,487] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:28:35,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:28:35,503] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:28:35,518] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:28:35,518] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:28:35,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:28:35,534] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:28:35,549] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:28:35,549] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:28:35,564] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:28:35,565] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:28:35,580] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:28:35,580] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:28:35,581] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:28:35,582] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step2000/mp_rank_00_model_states.pt +0: [2023-03-13 23:28:35,582] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:28:35,584] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,600] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:28:35,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:28:35,632] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,632] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,632] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,636] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,639] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,641] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,641] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,643] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,643] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,644] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,644] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,645] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,645] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,645] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,635] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,635] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,638] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,636] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,638] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,639] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,637] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,639] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,637] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,643] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,642] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,642] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,640] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,640] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,650] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,650] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,647] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,647] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,648] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,654] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,654] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,654] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,655] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,655] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,656] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,661] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,660] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,661] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,660] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,661] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,661] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +7: [2023-03-13 23:28:35,661] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:28:35,661] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:28:35,661] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,662] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,662] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,662] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,663] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,663] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,664] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,664] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,664] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,659] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,665] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,665] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,665] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,665] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,665] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,665] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,659] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,665] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,659] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,665] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,666] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,666] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,666] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,666] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +4: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:28:35,668] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,668] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,668] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,668] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,669] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,669] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,669] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,669] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,670] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,670] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,671] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,671] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,671] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +6: [2023-03-13 23:28:35,671] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:28:35,671] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:28:35,671] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,673] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,673] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,673] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +5: [2023-03-13 23:28:35,673] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:28:35,674] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,674] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,674] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,674] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +1: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:28:35,674] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:28:35,674] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +2: [2023-03-13 23:28:35,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:28:35,679] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-13 23:28:35,679] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,679] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,679] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +3: [2023-03-13 23:28:35,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:28:35,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-13 23:28:35,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,692] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,692] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,692] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: [2023-03-13 23:28:35,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:28:35,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:28:35,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step2000 is ready now! +0: successfully saved checkpoint at iteration 2000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 449.81 +7: iteration 2010/ 21553 | consumed samples: 514560 | consumed tokens: 1053818880 | elapsed time per iteration (s): 0.35 | learning rate: 1.969E-04 | global batch size: 256 | lm loss: 3.984137E+00 | grad norm: 0.581 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 729.796 | TFLOPs: 25.55 | +7: iteration 2020/ 21553 | consumed samples: 517120 | consumed tokens: 1059061760 | elapsed time per iteration (s): 0.30 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.976977E+00 | grad norm: 0.645 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.455 | TFLOPs: 30.37 | +7: iteration 2030/ 21553 | consumed samples: 519680 | consumed tokens: 1064304640 | elapsed time per iteration (s): 0.29 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.984406E+00 | grad norm: 0.603 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.013 | TFLOPs: 30.39 | +7: iteration 2040/ 21553 | consumed samples: 522240 | consumed tokens: 1069547520 | elapsed time per iteration (s): 0.30 | learning rate: 1.968E-04 | global batch size: 256 | lm loss: 3.972224E+00 | grad norm: 0.614 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.294 | TFLOPs: 30.36 | +7: iteration 2050/ 21553 | consumed samples: 524800 | consumed tokens: 1074790400 | elapsed time per iteration (s): 0.30 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.985341E+00 | grad norm: 0.682 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.682 | TFLOPs: 30.38 | +7: iteration 2060/ 21553 | consumed samples: 527360 | consumed tokens: 1080033280 | elapsed time per iteration (s): 0.29 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.957002E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.926 | TFLOPs: 30.38 | +7: iteration 2070/ 21553 | consumed samples: 529920 | consumed tokens: 1085276160 | elapsed time per iteration (s): 0.30 | learning rate: 1.967E-04 | global batch size: 256 | lm loss: 3.966101E+00 | grad norm: 0.511 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.789 | TFLOPs: 30.38 | +7: iteration 2080/ 21553 | consumed samples: 532480 | consumed tokens: 1090519040 | elapsed time per iteration (s): 0.30 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.957634E+00 | grad norm: 0.583 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.787 | TFLOPs: 30.38 | +7: iteration 2090/ 21553 | consumed samples: 535040 | consumed tokens: 1095761920 | elapsed time per iteration (s): 0.30 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.950177E+00 | grad norm: 0.488 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.411 | TFLOPs: 30.37 | +7: iteration 2100/ 21553 | consumed samples: 537600 | consumed tokens: 1101004800 | elapsed time per iteration (s): 0.29 | learning rate: 1.966E-04 | global batch size: 256 | lm loss: 3.959805E+00 | grad norm: 0.512 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.807 | TFLOPs: 30.38 | +7: iteration 2110/ 21553 | consumed samples: 540160 | consumed tokens: 1106247680 | elapsed time per iteration (s): 0.30 | learning rate: 1.965E-04 | global batch size: 256 | lm loss: 3.952080E+00 | grad norm: 0.540 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.571 | TFLOPs: 30.34 | +7: iteration 2120/ 21553 | consumed samples: 542720 | consumed tokens: 1111490560 | elapsed time per iteration (s): 0.30 | learning rate: 1.965E-04 | global batch size: 256 | lm loss: 3.956501E+00 | grad norm: 0.606 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.849 | TFLOPs: 30.35 | +7: iteration 2130/ 21553 | consumed samples: 545280 | consumed tokens: 1116733440 | elapsed time per iteration (s): 0.30 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.948131E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.678 | TFLOPs: 30.27 | +7: iteration 2140/ 21553 | consumed samples: 547840 | consumed tokens: 1121976320 | elapsed time per iteration (s): 0.29 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.945877E+00 | grad norm: 0.535 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.133 | TFLOPs: 30.39 | +7: iteration 2150/ 21553 | consumed samples: 550400 | consumed tokens: 1127219200 | elapsed time per iteration (s): 0.29 | learning rate: 1.964E-04 | global batch size: 256 | lm loss: 3.933694E+00 | grad norm: 0.494 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.619 | TFLOPs: 30.41 | +7: iteration 2160/ 21553 | consumed samples: 552960 | consumed tokens: 1132462080 | elapsed time per iteration (s): 0.30 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.937864E+00 | grad norm: 0.700 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.964 | TFLOPs: 30.35 | +7: iteration 2170/ 21553 | consumed samples: 555520 | consumed tokens: 1137704960 | elapsed time per iteration (s): 0.29 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.936363E+00 | grad norm: 0.559 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.305 | TFLOPs: 30.40 | +7: iteration 2180/ 21553 | consumed samples: 558080 | consumed tokens: 1142947840 | elapsed time per iteration (s): 0.30 | learning rate: 1.963E-04 | global batch size: 256 | lm loss: 3.935008E+00 | grad norm: 0.529 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.717 | TFLOPs: 30.38 | +7: iteration 2190/ 21553 | consumed samples: 560640 | consumed tokens: 1148190720 | elapsed time per iteration (s): 0.30 | learning rate: 1.962E-04 | global batch size: 256 | lm loss: 3.933493E+00 | grad norm: 0.474 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.333 | TFLOPs: 30.36 | +7: iteration 2200/ 21553 | consumed samples: 563200 | consumed tokens: 1153433600 | elapsed time per iteration (s): 0.30 | learning rate: 1.962E-04 | global batch size: 256 | lm loss: 3.928037E+00 | grad norm: 0.671 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.164 | TFLOPs: 30.36 | +7: iteration 2210/ 21553 | consumed samples: 565760 | consumed tokens: 1158676480 | elapsed time per iteration (s): 0.30 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.929472E+00 | grad norm: 0.546 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.752 | TFLOPs: 30.27 | +7: iteration 2220/ 21553 | consumed samples: 568320 | consumed tokens: 1163919360 | elapsed time per iteration (s): 0.30 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.934417E+00 | grad norm: 0.578 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.879 | TFLOPs: 30.35 | +7: iteration 2230/ 21553 | consumed samples: 570880 | consumed tokens: 1169162240 | elapsed time per iteration (s): 0.30 | learning rate: 1.961E-04 | global batch size: 256 | lm loss: 3.930299E+00 | grad norm: 0.536 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.043 | TFLOPs: 30.32 | +7: iteration 2240/ 21553 | consumed samples: 573440 | consumed tokens: 1174405120 | elapsed time per iteration (s): 0.30 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.915599E+00 | grad norm: 0.519 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.706 | TFLOPs: 30.34 | +7: iteration 2250/ 21553 | consumed samples: 576000 | consumed tokens: 1179648000 | elapsed time per iteration (s): 0.30 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.937119E+00 | grad norm: 0.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.945 | TFLOPs: 30.35 | +7: iteration 2260/ 21553 | consumed samples: 578560 | consumed tokens: 1184890880 | elapsed time per iteration (s): 0.30 | learning rate: 1.960E-04 | global batch size: 256 | lm loss: 3.908054E+00 | grad norm: 0.827 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.222 | TFLOPs: 30.36 | +7: iteration 2270/ 21553 | consumed samples: 581120 | consumed tokens: 1190133760 | elapsed time per iteration (s): 0.30 | learning rate: 1.959E-04 | global batch size: 256 | lm loss: 3.915228E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.114 | TFLOPs: 30.36 | +7: iteration 2280/ 21553 | consumed samples: 583680 | consumed tokens: 1195376640 | elapsed time per iteration (s): 0.30 | learning rate: 1.959E-04 | global batch size: 256 | lm loss: 3.898435E+00 | grad norm: 0.481 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.276 | TFLOPs: 30.36 | +7: iteration 2290/ 21553 | consumed samples: 586240 | consumed tokens: 1200619520 | elapsed time per iteration (s): 0.30 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.908500E+00 | grad norm: 0.482 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.173 | TFLOPs: 30.29 | +7: iteration 2300/ 21553 | consumed samples: 588800 | consumed tokens: 1205862400 | elapsed time per iteration (s): 0.30 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.913681E+00 | grad norm: 0.579 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.457 | TFLOPs: 30.26 | +7: iteration 2310/ 21553 | consumed samples: 591360 | consumed tokens: 1211105280 | elapsed time per iteration (s): 0.30 | learning rate: 1.958E-04 | global batch size: 256 | lm loss: 3.911969E+00 | grad norm: 0.540 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.068 | TFLOPs: 30.35 | +7: iteration 2320/ 21553 | consumed samples: 593920 | consumed tokens: 1216348160 | elapsed time per iteration (s): 0.30 | learning rate: 1.957E-04 | global batch size: 256 | lm loss: 3.898923E+00 | grad norm: 0.552 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.492 | TFLOPs: 30.37 | +7: iteration 2330/ 21553 | consumed samples: 596480 | consumed tokens: 1221591040 | elapsed time per iteration (s): 0.30 | learning rate: 1.957E-04 | global batch size: 256 | lm loss: 3.896117E+00 | grad norm: 0.530 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.535 | TFLOPs: 30.37 | +7: iteration 2340/ 21553 | consumed samples: 599040 | consumed tokens: 1226833920 | elapsed time per iteration (s): 0.30 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.878368E+00 | grad norm: 0.564 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.267 | TFLOPs: 30.36 | +7: iteration 2350/ 21553 | consumed samples: 601600 | consumed tokens: 1232076800 | elapsed time per iteration (s): 0.30 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.900122E+00 | grad norm: 0.582 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.774 | TFLOPs: 30.38 | +7: iteration 2360/ 21553 | consumed samples: 604160 | consumed tokens: 1237319680 | elapsed time per iteration (s): 0.30 | learning rate: 1.956E-04 | global batch size: 256 | lm loss: 3.891970E+00 | grad norm: 0.494 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.320 | TFLOPs: 30.36 | +7: iteration 2370/ 21553 | consumed samples: 606720 | consumed tokens: 1242562560 | elapsed time per iteration (s): 0.30 | learning rate: 1.955E-04 | global batch size: 256 | lm loss: 3.877228E+00 | grad norm: 0.554 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.004 | TFLOPs: 30.35 | +7: iteration 2380/ 21553 | consumed samples: 609280 | consumed tokens: 1247805440 | elapsed time per iteration (s): 0.30 | learning rate: 1.955E-04 | global batch size: 256 | lm loss: 3.869791E+00 | grad norm: 0.584 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.470 | TFLOPs: 30.33 | +7: iteration 2390/ 21553 | consumed samples: 611840 | consumed tokens: 1253048320 | elapsed time per iteration (s): 0.30 | learning rate: 1.954E-04 | global batch size: 256 | lm loss: 3.879839E+00 | grad norm: 0.511 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.242 | TFLOPs: 30.29 | +7: iteration 2400/ 21553 | consumed samples: 614400 | consumed tokens: 1258291200 | elapsed time per iteration (s): 0.30 | learning rate: 1.954E-04 | global batch size: 256 | lm loss: 3.873315E+00 | grad norm: 0.504 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.493 | TFLOPs: 30.37 | +7: iteration 2410/ 21553 | consumed samples: 616960 | consumed tokens: 1263534080 | elapsed time per iteration (s): 0.30 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.870964E+00 | grad norm: 0.464 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.273 | TFLOPs: 30.36 | +7: iteration 2420/ 21553 | consumed samples: 619520 | consumed tokens: 1268776960 | elapsed time per iteration (s): 0.30 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.874531E+00 | grad norm: 0.562 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.654 | TFLOPs: 30.37 | +7: iteration 2430/ 21553 | consumed samples: 622080 | consumed tokens: 1274019840 | elapsed time per iteration (s): 0.30 | learning rate: 1.953E-04 | global batch size: 256 | lm loss: 3.867783E+00 | grad norm: 0.585 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.481 | TFLOPs: 30.37 | +7: iteration 2440/ 21553 | consumed samples: 624640 | consumed tokens: 1279262720 | elapsed time per iteration (s): 0.30 | learning rate: 1.952E-04 | global batch size: 256 | lm loss: 3.858185E+00 | grad norm: 0.510 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.958 | TFLOPs: 30.35 | +7: iteration 2450/ 21553 | consumed samples: 627200 | consumed tokens: 1284505600 | elapsed time per iteration (s): 0.29 | learning rate: 1.952E-04 | global batch size: 256 | lm loss: 3.863815E+00 | grad norm: 0.523 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.816 | TFLOPs: 30.38 | +7: iteration 2460/ 21553 | consumed samples: 629760 | consumed tokens: 1289748480 | elapsed time per iteration (s): 0.30 | learning rate: 1.951E-04 | global batch size: 256 | lm loss: 3.865389E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.435 | TFLOPs: 30.37 | +7: iteration 2470/ 21553 | consumed samples: 632320 | consumed tokens: 1294991360 | elapsed time per iteration (s): 0.30 | learning rate: 1.951E-04 | global batch size: 256 | lm loss: 3.860489E+00 | grad norm: 0.553 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.738 | TFLOPs: 30.38 | +7: iteration 2480/ 21553 | consumed samples: 634880 | consumed tokens: 1300234240 | elapsed time per iteration (s): 0.30 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.856812E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.334 | TFLOPs: 30.36 | +7: iteration 2490/ 21553 | consumed samples: 637440 | consumed tokens: 1305477120 | elapsed time per iteration (s): 0.30 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.858611E+00 | grad norm: 0.674 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.068 | TFLOPs: 30.35 | +7: iteration 2500/ 21553 | consumed samples: 640000 | consumed tokens: 1310720000 | elapsed time per iteration (s): 0.30 | learning rate: 1.950E-04 | global batch size: 256 | lm loss: 3.837459E+00 | grad norm: 0.586 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.408 | TFLOPs: 30.37 | +7: iteration 2510/ 21553 | consumed samples: 642560 | consumed tokens: 1315962880 | elapsed time per iteration (s): 0.30 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 3.846861E+00 | grad norm: 0.541 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.786 | TFLOPs: 30.38 | +7: iteration 2520/ 21553 | consumed samples: 645120 | consumed tokens: 1321205760 | elapsed time per iteration (s): 0.29 | learning rate: 1.949E-04 | global batch size: 256 | lm loss: 3.843253E+00 | grad norm: 0.461 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.800 | TFLOPs: 30.38 | +7: iteration 2530/ 21553 | consumed samples: 647680 | consumed tokens: 1326448640 | elapsed time per iteration (s): 0.30 | learning rate: 1.948E-04 | global batch size: 256 | lm loss: 3.845337E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.592 | TFLOPs: 30.37 | +7: iteration 2540/ 21553 | consumed samples: 650240 | consumed tokens: 1331691520 | elapsed time per iteration (s): 0.30 | learning rate: 1.948E-04 | global batch size: 256 | lm loss: 3.841733E+00 | grad norm: 0.561 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.944 | TFLOPs: 30.35 | +7: iteration 2550/ 21553 | consumed samples: 652800 | consumed tokens: 1336934400 | elapsed time per iteration (s): 0.30 | learning rate: 1.947E-04 | global batch size: 256 | lm loss: 3.856787E+00 | grad norm: 0.596 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.537 | TFLOPs: 30.37 | +7: iteration 2560/ 21553 | consumed samples: 655360 | consumed tokens: 1342177280 | elapsed time per iteration (s): 0.30 | learning rate: 1.947E-04 | global batch size: 256 | lm loss: 3.847873E+00 | grad norm: 0.486 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.254 | TFLOPs: 30.29 | +7: iteration 2570/ 21553 | consumed samples: 657920 | consumed tokens: 1347420160 | elapsed time per iteration (s): 0.30 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.836088E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.634 | TFLOPs: 30.27 | +7: iteration 2580/ 21553 | consumed samples: 660480 | consumed tokens: 1352663040 | elapsed time per iteration (s): 0.30 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.842388E+00 | grad norm: 0.488 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.090 | TFLOPs: 30.28 | +7: iteration 2590/ 21553 | consumed samples: 663040 | consumed tokens: 1357905920 | elapsed time per iteration (s): 0.30 | learning rate: 1.946E-04 | global batch size: 256 | lm loss: 3.823383E+00 | grad norm: 0.589 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.422 | TFLOPs: 30.26 | +7: iteration 2600/ 21553 | consumed samples: 665600 | consumed tokens: 1363148800 | elapsed time per iteration (s): 0.30 | learning rate: 1.945E-04 | global batch size: 256 | lm loss: 3.821144E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.763 | TFLOPs: 30.31 | +7: iteration 2610/ 21553 | consumed samples: 668160 | consumed tokens: 1368391680 | elapsed time per iteration (s): 0.30 | learning rate: 1.945E-04 | global batch size: 256 | lm loss: 3.820165E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.432 | TFLOPs: 30.30 | +7: iteration 2620/ 21553 | consumed samples: 670720 | consumed tokens: 1373634560 | elapsed time per iteration (s): 0.30 | learning rate: 1.944E-04 | global batch size: 256 | lm loss: 3.822075E+00 | grad norm: 0.557 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.099 | TFLOPs: 30.35 | +7: iteration 2630/ 21553 | consumed samples: 673280 | consumed tokens: 1378877440 | elapsed time per iteration (s): 0.30 | learning rate: 1.944E-04 | global batch size: 256 | lm loss: 3.820847E+00 | grad norm: 0.497 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.581 | TFLOPs: 30.37 | +7: iteration 2640/ 21553 | consumed samples: 675840 | consumed tokens: 1384120320 | elapsed time per iteration (s): 0.30 | learning rate: 1.943E-04 | global batch size: 256 | lm loss: 3.824648E+00 | grad norm: 0.560 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.746 | TFLOPs: 30.38 | +7: iteration 2650/ 21553 | consumed samples: 678400 | consumed tokens: 1389363200 | elapsed time per iteration (s): 0.29 | learning rate: 1.943E-04 | global batch size: 256 | lm loss: 3.810417E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.376 | TFLOPs: 30.40 | +7: iteration 2660/ 21553 | consumed samples: 680960 | consumed tokens: 1394606080 | elapsed time per iteration (s): 0.29 | learning rate: 1.942E-04 | global batch size: 256 | lm loss: 3.809052E+00 | grad norm: 0.555 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.702 | TFLOPs: 30.41 | +7: iteration 2670/ 21553 | consumed samples: 683520 | consumed tokens: 1399848960 | elapsed time per iteration (s): 0.29 | learning rate: 1.942E-04 | global batch size: 256 | lm loss: 3.813150E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.579 | TFLOPs: 30.41 | +7: iteration 2680/ 21553 | consumed samples: 686080 | consumed tokens: 1405091840 | elapsed time per iteration (s): 0.29 | learning rate: 1.941E-04 | global batch size: 256 | lm loss: 3.822684E+00 | grad norm: 0.561 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.403 | TFLOPs: 30.40 | +7: iteration 2690/ 21553 | consumed samples: 688640 | consumed tokens: 1410334720 | elapsed time per iteration (s): 0.29 | learning rate: 1.941E-04 | global batch size: 256 | lm loss: 3.819854E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.225 | TFLOPs: 30.39 | +7: iteration 2700/ 21553 | consumed samples: 691200 | consumed tokens: 1415577600 | elapsed time per iteration (s): 0.29 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 3.808660E+00 | grad norm: 0.504 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.158 | TFLOPs: 30.39 | +7: iteration 2710/ 21553 | consumed samples: 693760 | consumed tokens: 1420820480 | elapsed time per iteration (s): 0.29 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 3.802417E+00 | grad norm: 0.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.928 | TFLOPs: 30.38 | +7: iteration 2720/ 21553 | consumed samples: 696320 | consumed tokens: 1426063360 | elapsed time per iteration (s): 0.30 | learning rate: 1.940E-04 | global batch size: 256 | lm loss: 3.806718E+00 | grad norm: 0.505 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.533 | TFLOPs: 30.37 | +7: iteration 2730/ 21553 | consumed samples: 698880 | consumed tokens: 1431306240 | elapsed time per iteration (s): 0.30 | learning rate: 1.939E-04 | global batch size: 256 | lm loss: 3.803021E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.907 | TFLOPs: 30.24 | +7: iteration 2740/ 21553 | consumed samples: 701440 | consumed tokens: 1436549120 | elapsed time per iteration (s): 0.30 | learning rate: 1.939E-04 | global batch size: 256 | lm loss: 3.812010E+00 | grad norm: 0.509 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.384 | TFLOPs: 30.26 | +7: iteration 2750/ 21553 | consumed samples: 704000 | consumed tokens: 1441792000 | elapsed time per iteration (s): 0.30 | learning rate: 1.938E-04 | global batch size: 256 | lm loss: 3.807826E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.533 | TFLOPs: 30.26 | +7: iteration 2760/ 21553 | consumed samples: 706560 | consumed tokens: 1447034880 | elapsed time per iteration (s): 0.30 | learning rate: 1.938E-04 | global batch size: 256 | lm loss: 3.795914E+00 | grad norm: 0.547 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.155 | TFLOPs: 30.25 | +7: iteration 2770/ 21553 | consumed samples: 709120 | consumed tokens: 1452277760 | elapsed time per iteration (s): 0.30 | learning rate: 1.937E-04 | global batch size: 256 | lm loss: 3.798462E+00 | grad norm: 0.634 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.049 | TFLOPs: 30.28 | +7: iteration 2780/ 21553 | consumed samples: 711680 | consumed tokens: 1457520640 | elapsed time per iteration (s): 0.30 | learning rate: 1.937E-04 | global batch size: 256 | lm loss: 3.813933E+00 | grad norm: 0.544 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.146 | TFLOPs: 30.25 | +7: iteration 2790/ 21553 | consumed samples: 714240 | consumed tokens: 1462763520 | elapsed time per iteration (s): 0.30 | learning rate: 1.936E-04 | global batch size: 256 | lm loss: 3.779525E+00 | grad norm: 0.557 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.166 | TFLOPs: 30.36 | +7: iteration 2800/ 21553 | consumed samples: 716800 | consumed tokens: 1468006400 | elapsed time per iteration (s): 0.30 | learning rate: 1.936E-04 | global batch size: 256 | lm loss: 3.793737E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.429 | TFLOPs: 30.37 | +7: iteration 2810/ 21553 | consumed samples: 719360 | consumed tokens: 1473249280 | elapsed time per iteration (s): 0.29 | learning rate: 1.935E-04 | global batch size: 256 | lm loss: 3.791569E+00 | grad norm: 0.553 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.823 | TFLOPs: 30.38 | +7: iteration 2820/ 21553 | consumed samples: 721920 | consumed tokens: 1478492160 | elapsed time per iteration (s): 0.29 | learning rate: 1.935E-04 | global batch size: 256 | lm loss: 3.784578E+00 | grad norm: 0.509 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.480 | TFLOPs: 30.40 | +7: iteration 2830/ 21553 | consumed samples: 724480 | consumed tokens: 1483735040 | elapsed time per iteration (s): 0.30 | learning rate: 1.934E-04 | global batch size: 256 | lm loss: 3.772977E+00 | grad norm: 0.532 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.823 | TFLOPs: 30.28 | +7: iteration 2840/ 21553 | consumed samples: 727040 | consumed tokens: 1488977920 | elapsed time per iteration (s): 0.30 | learning rate: 1.934E-04 | global batch size: 256 | lm loss: 3.789830E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.901 | TFLOPs: 30.31 | +7: iteration 2850/ 21553 | consumed samples: 729600 | consumed tokens: 1494220800 | elapsed time per iteration (s): 0.29 | learning rate: 1.933E-04 | global batch size: 256 | lm loss: 3.772165E+00 | grad norm: 0.601 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.230 | TFLOPs: 30.39 | +7: iteration 2860/ 21553 | consumed samples: 732160 | consumed tokens: 1499463680 | elapsed time per iteration (s): 0.29 | learning rate: 1.933E-04 | global batch size: 256 | lm loss: 3.779284E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.207 | TFLOPs: 30.39 | +7: iteration 2870/ 21553 | consumed samples: 734720 | consumed tokens: 1504706560 | elapsed time per iteration (s): 0.30 | learning rate: 1.932E-04 | global batch size: 256 | lm loss: 3.765446E+00 | grad norm: 0.509 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.609 | TFLOPs: 30.37 | +7: iteration 2880/ 21553 | consumed samples: 737280 | consumed tokens: 1509949440 | elapsed time per iteration (s): 0.29 | learning rate: 1.932E-04 | global batch size: 256 | lm loss: 3.765057E+00 | grad norm: 0.598 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.964 | TFLOPs: 30.39 | +7: iteration 2890/ 21553 | consumed samples: 739840 | consumed tokens: 1515192320 | elapsed time per iteration (s): 0.30 | learning rate: 1.931E-04 | global batch size: 256 | lm loss: 3.769784E+00 | grad norm: 0.549 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.234 | TFLOPs: 30.36 | +7: iteration 2900/ 21553 | consumed samples: 742400 | consumed tokens: 1520435200 | elapsed time per iteration (s): 0.30 | learning rate: 1.931E-04 | global batch size: 256 | lm loss: 3.754916E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.517 | TFLOPs: 30.37 | +7: iteration 2910/ 21553 | consumed samples: 744960 | consumed tokens: 1525678080 | elapsed time per iteration (s): 0.30 | learning rate: 1.930E-04 | global batch size: 256 | lm loss: 3.759314E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.612 | TFLOPs: 30.37 | +7: iteration 2920/ 21553 | consumed samples: 747520 | consumed tokens: 1530920960 | elapsed time per iteration (s): 0.30 | learning rate: 1.930E-04 | global batch size: 256 | lm loss: 3.769167E+00 | grad norm: 0.510 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.079 | TFLOPs: 30.35 | +7: iteration 2930/ 21553 | consumed samples: 750080 | consumed tokens: 1536163840 | elapsed time per iteration (s): 0.29 | learning rate: 1.929E-04 | global batch size: 256 | lm loss: 3.764174E+00 | grad norm: 0.547 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.861 | TFLOPs: 30.38 | +7: iteration 2940/ 21553 | consumed samples: 752640 | consumed tokens: 1541406720 | elapsed time per iteration (s): 0.30 | learning rate: 1.929E-04 | global batch size: 256 | lm loss: 3.761726E+00 | grad norm: 0.506 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.746 | TFLOPs: 30.38 | +7: iteration 2950/ 21553 | consumed samples: 755200 | consumed tokens: 1546649600 | elapsed time per iteration (s): 0.30 | learning rate: 1.928E-04 | global batch size: 256 | lm loss: 3.763166E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.069 | TFLOPs: 30.35 | +7: iteration 2960/ 21553 | consumed samples: 757760 | consumed tokens: 1551892480 | elapsed time per iteration (s): 0.30 | learning rate: 1.928E-04 | global batch size: 256 | lm loss: 3.771272E+00 | grad norm: 0.497 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.166 | TFLOPs: 30.36 | +7: iteration 2970/ 21553 | consumed samples: 760320 | consumed tokens: 1557135360 | elapsed time per iteration (s): 0.29 | learning rate: 1.927E-04 | global batch size: 256 | lm loss: 3.765143E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.132 | TFLOPs: 30.39 | +7: iteration 2980/ 21553 | consumed samples: 762880 | consumed tokens: 1562378240 | elapsed time per iteration (s): 0.29 | learning rate: 1.926E-04 | global batch size: 256 | lm loss: 3.744521E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.842 | TFLOPs: 30.38 | +7: iteration 2990/ 21553 | consumed samples: 765440 | consumed tokens: 1567621120 | elapsed time per iteration (s): 0.29 | learning rate: 1.926E-04 | global batch size: 256 | lm loss: 3.752171E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.048 | TFLOPs: 30.39 | +7: iteration 3000/ 21553 | consumed samples: 768000 | consumed tokens: 1572864000 | elapsed time per iteration (s): 0.29 | learning rate: 1.925E-04 | global batch size: 256 | lm loss: 3.758910E+00 | grad norm: 0.634 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.888 | TFLOPs: 30.38 | +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 3000 | lm loss value: 3.854548E+00 | lm loss PPL: 4.720725E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 3000 to checkpoints_146m14b100m +0: [2023-03-13 23:33:31,058] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step3000 is begin to save! +0: [2023-03-13 23:33:31,061] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:33:31,145] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:33:31,145] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:33:31,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:33:31,161] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:33:31,175] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:33:31,176] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:33:31,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:33:31,191] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:33:31,205] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:33:31,206] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:33:31,220] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:33:31,221] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:33:31,235] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:33:31,236] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:33:31,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:33:31,251] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:33:31,265] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:33:31,266] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:33:31,281] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:33:31,281] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:33:31,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:33:31,296] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:33:31,311] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:33:31,311] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:33:31,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:33:31,326] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:33:31,341] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:33:31,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:33:31,356] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:33:31,356] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:33:31,371] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:33:31,371] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:33:31,372] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:33:31,373] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step3000/mp_rank_00_model_states.pt +0: [2023-03-13 23:33:31,373] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:33:31,375] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,391] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:33:31,421] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,421] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,421] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,423] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,423] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,423] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,425] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,425] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,425] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,427] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,427] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,426] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,426] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,426] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,427] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,429] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,429] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,428] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,428] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,431] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,431] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,432] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,432] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,434] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,434] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +7: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:33:31,433] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:33:31,433] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,434] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,434] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,434] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +2: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,430] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,430] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,435] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,435] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,437] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,437] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,437] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,437] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,437] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,437] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,441] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,441] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,441] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,441] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,441] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,441] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +3: [2023-03-13 23:33:31,442] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:33:31,442] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:33:31,443] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,449] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,449] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,449] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,456] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,456] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,456] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,460] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,460] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,460] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,461] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,461] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,461] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,461] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,461] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,461] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,463] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,463] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,463] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +1: [2023-03-13 23:33:31,466] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:33:31,466] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:33:31,466] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +6: [2023-03-13 23:33:31,466] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:33:31,467] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:33:31,467] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,468] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,468] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +4: [2023-03-13 23:33:31,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:33:31,468] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:33:31,468] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +5: [2023-03-13 23:33:31,474] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:33:31,474] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:33:31,474] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,475] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,478] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,478] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: [2023-03-13 23:33:31,481] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:33:31,481] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step3000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:33:31,481] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step3000 is ready now! +0: successfully saved checkpoint at iteration 3000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 426.98 +7: iteration 3010/ 21553 | consumed samples: 770560 | consumed tokens: 1578106880 | elapsed time per iteration (s): 0.35 | learning rate: 1.925E-04 | global batch size: 256 | lm loss: 3.763004E+00 | grad norm: 0.532 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 738.769 | TFLOPs: 25.86 | +7: iteration 3020/ 21553 | consumed samples: 773120 | consumed tokens: 1583349760 | elapsed time per iteration (s): 0.30 | learning rate: 1.924E-04 | global batch size: 256 | lm loss: 3.748506E+00 | grad norm: 0.492 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.563 | TFLOPs: 30.37 | +7: iteration 3030/ 21553 | consumed samples: 775680 | consumed tokens: 1588592640 | elapsed time per iteration (s): 0.30 | learning rate: 1.924E-04 | global batch size: 256 | lm loss: 3.747661E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.608 | TFLOPs: 30.37 | +7: iteration 3040/ 21553 | consumed samples: 778240 | consumed tokens: 1593835520 | elapsed time per iteration (s): 0.30 | learning rate: 1.923E-04 | global batch size: 256 | lm loss: 3.745968E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.105 | TFLOPs: 30.35 | +7: iteration 3050/ 21553 | consumed samples: 780800 | consumed tokens: 1599078400 | elapsed time per iteration (s): 0.30 | learning rate: 1.923E-04 | global batch size: 256 | lm loss: 3.738530E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.638 | TFLOPs: 30.09 | +7: iteration 3060/ 21553 | consumed samples: 783360 | consumed tokens: 1604321280 | elapsed time per iteration (s): 0.30 | learning rate: 1.922E-04 | global batch size: 256 | lm loss: 3.738536E+00 | grad norm: 0.491 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.881 | TFLOPs: 30.24 | +7: iteration 3070/ 21553 | consumed samples: 785920 | consumed tokens: 1609564160 | elapsed time per iteration (s): 0.30 | learning rate: 1.922E-04 | global batch size: 256 | lm loss: 3.748618E+00 | grad norm: 0.485 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.522 | TFLOPs: 30.19 | +7: iteration 3080/ 21553 | consumed samples: 788480 | consumed tokens: 1614807040 | elapsed time per iteration (s): 0.30 | learning rate: 1.921E-04 | global batch size: 256 | lm loss: 3.730001E+00 | grad norm: 0.545 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.788 | TFLOPs: 30.20 | +7: iteration 3090/ 21553 | consumed samples: 791040 | consumed tokens: 1620049920 | elapsed time per iteration (s): 0.30 | learning rate: 1.921E-04 | global batch size: 256 | lm loss: 3.734828E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.594 | TFLOPs: 30.23 | +7: iteration 3100/ 21553 | consumed samples: 793600 | consumed tokens: 1625292800 | elapsed time per iteration (s): 0.30 | learning rate: 1.920E-04 | global batch size: 256 | lm loss: 3.735106E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.659 | TFLOPs: 30.20 | +7: iteration 3110/ 21553 | consumed samples: 796160 | consumed tokens: 1630535680 | elapsed time per iteration (s): 0.30 | learning rate: 1.920E-04 | global batch size: 256 | lm loss: 3.737600E+00 | grad norm: 0.482 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.612 | TFLOPs: 30.13 | +7: iteration 3120/ 21553 | consumed samples: 798720 | consumed tokens: 1635778560 | elapsed time per iteration (s): 0.30 | learning rate: 1.919E-04 | global batch size: 256 | lm loss: 3.735183E+00 | grad norm: 0.529 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.684 | TFLOPs: 30.24 | +7: iteration 3130/ 21553 | consumed samples: 801280 | consumed tokens: 1641021440 | elapsed time per iteration (s): 0.30 | learning rate: 1.918E-04 | global batch size: 256 | lm loss: 3.725793E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.690 | TFLOPs: 30.24 | +7: iteration 3140/ 21553 | consumed samples: 803840 | consumed tokens: 1646264320 | elapsed time per iteration (s): 0.30 | learning rate: 1.918E-04 | global batch size: 256 | lm loss: 3.718746E+00 | grad norm: 0.492 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.212 | TFLOPs: 30.22 | +7: iteration 3150/ 21553 | consumed samples: 806400 | consumed tokens: 1651507200 | elapsed time per iteration (s): 0.30 | learning rate: 1.917E-04 | global batch size: 256 | lm loss: 3.723774E+00 | grad norm: 0.519 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.259 | TFLOPs: 30.22 | +7: iteration 3160/ 21553 | consumed samples: 808960 | consumed tokens: 1656750080 | elapsed time per iteration (s): 0.30 | learning rate: 1.917E-04 | global batch size: 256 | lm loss: 3.722232E+00 | grad norm: 0.528 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.539 | TFLOPs: 30.23 | +7: iteration 3170/ 21553 | consumed samples: 811520 | consumed tokens: 1661992960 | elapsed time per iteration (s): 0.30 | learning rate: 1.916E-04 | global batch size: 256 | lm loss: 3.733519E+00 | grad norm: 0.497 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.199 | TFLOPs: 30.22 | +7: iteration 3180/ 21553 | consumed samples: 814080 | consumed tokens: 1667235840 | elapsed time per iteration (s): 0.30 | learning rate: 1.916E-04 | global batch size: 256 | lm loss: 3.716302E+00 | grad norm: 0.495 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.355 | TFLOPs: 30.22 | +7: iteration 3190/ 21553 | consumed samples: 816640 | consumed tokens: 1672478720 | elapsed time per iteration (s): 0.30 | learning rate: 1.915E-04 | global batch size: 256 | lm loss: 3.731672E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.383 | TFLOPs: 30.22 | +7: iteration 3200/ 21553 | consumed samples: 819200 | consumed tokens: 1677721600 | elapsed time per iteration (s): 0.30 | learning rate: 1.915E-04 | global batch size: 256 | lm loss: 3.715638E+00 | grad norm: 0.491 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.947 | TFLOPs: 30.24 | +7: iteration 3210/ 21553 | consumed samples: 821760 | consumed tokens: 1682964480 | elapsed time per iteration (s): 0.30 | learning rate: 1.914E-04 | global batch size: 256 | lm loss: 3.709644E+00 | grad norm: 0.486 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.278 | TFLOPs: 30.26 | +7: iteration 3220/ 21553 | consumed samples: 824320 | consumed tokens: 1688207360 | elapsed time per iteration (s): 0.30 | learning rate: 1.913E-04 | global batch size: 256 | lm loss: 3.720410E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.330 | TFLOPs: 30.22 | +7: iteration 3230/ 21553 | consumed samples: 826880 | consumed tokens: 1693450240 | elapsed time per iteration (s): 0.30 | learning rate: 1.913E-04 | global batch size: 256 | lm loss: 3.702439E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.145 | TFLOPs: 30.29 | +7: iteration 3240/ 21553 | consumed samples: 829440 | consumed tokens: 1698693120 | elapsed time per iteration (s): 0.30 | learning rate: 1.912E-04 | global batch size: 256 | lm loss: 3.698589E+00 | grad norm: 0.535 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.577 | TFLOPs: 30.37 | +7: iteration 3250/ 21553 | consumed samples: 832000 | consumed tokens: 1703936000 | elapsed time per iteration (s): 0.30 | learning rate: 1.912E-04 | global batch size: 256 | lm loss: 3.707012E+00 | grad norm: 0.538 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.282 | TFLOPs: 30.36 | +7: iteration 3260/ 21553 | consumed samples: 834560 | consumed tokens: 1709178880 | elapsed time per iteration (s): 0.30 | learning rate: 1.911E-04 | global batch size: 256 | lm loss: 3.701438E+00 | grad norm: 0.510 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.782 | TFLOPs: 30.38 | +7: iteration 3270/ 21553 | consumed samples: 837120 | consumed tokens: 1714421760 | elapsed time per iteration (s): 0.30 | learning rate: 1.911E-04 | global batch size: 256 | lm loss: 3.716389E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.585 | TFLOPs: 30.37 | +7: iteration 3280/ 21553 | consumed samples: 839680 | consumed tokens: 1719664640 | elapsed time per iteration (s): 0.29 | learning rate: 1.910E-04 | global batch size: 256 | lm loss: 3.714522E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.908 | TFLOPs: 30.38 | +7: iteration 3290/ 21553 | consumed samples: 842240 | consumed tokens: 1724907520 | elapsed time per iteration (s): 0.29 | learning rate: 1.909E-04 | global batch size: 256 | lm loss: 3.695198E+00 | grad norm: 0.540 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.411 | TFLOPs: 30.40 | +7: iteration 3300/ 21553 | consumed samples: 844800 | consumed tokens: 1730150400 | elapsed time per iteration (s): 0.30 | learning rate: 1.909E-04 | global batch size: 256 | lm loss: 3.695579E+00 | grad norm: 0.473 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.542 | TFLOPs: 30.37 | +7: iteration 3310/ 21553 | consumed samples: 847360 | consumed tokens: 1735393280 | elapsed time per iteration (s): 0.29 | learning rate: 1.908E-04 | global batch size: 256 | lm loss: 3.699978E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.024 | TFLOPs: 30.39 | +7: iteration 3320/ 21553 | consumed samples: 849920 | consumed tokens: 1740636160 | elapsed time per iteration (s): 0.29 | learning rate: 1.908E-04 | global batch size: 256 | lm loss: 3.695239E+00 | grad norm: 0.495 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.571 | TFLOPs: 30.41 | +7: iteration 3330/ 21553 | consumed samples: 852480 | consumed tokens: 1745879040 | elapsed time per iteration (s): 0.29 | learning rate: 1.907E-04 | global batch size: 256 | lm loss: 3.695112E+00 | grad norm: 0.526 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.780 | TFLOPs: 30.41 | +7: iteration 3340/ 21553 | consumed samples: 855040 | consumed tokens: 1751121920 | elapsed time per iteration (s): 0.29 | learning rate: 1.906E-04 | global batch size: 256 | lm loss: 3.691437E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.268 | TFLOPs: 30.40 | +7: iteration 3350/ 21553 | consumed samples: 857600 | consumed tokens: 1756364800 | elapsed time per iteration (s): 0.30 | learning rate: 1.906E-04 | global batch size: 256 | lm loss: 3.687476E+00 | grad norm: 0.501 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.804 | TFLOPs: 29.71 | +7: iteration 3360/ 21553 | consumed samples: 860160 | consumed tokens: 1761607680 | elapsed time per iteration (s): 0.29 | learning rate: 1.905E-04 | global batch size: 256 | lm loss: 3.691874E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.561 | TFLOPs: 30.41 | +7: iteration 3370/ 21553 | consumed samples: 862720 | consumed tokens: 1766850560 | elapsed time per iteration (s): 0.29 | learning rate: 1.905E-04 | global batch size: 256 | lm loss: 3.701915E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.439 | TFLOPs: 30.40 | +7: iteration 3380/ 21553 | consumed samples: 865280 | consumed tokens: 1772093440 | elapsed time per iteration (s): 0.29 | learning rate: 1.904E-04 | global batch size: 256 | lm loss: 3.683498E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.032 | TFLOPs: 30.39 | +7: iteration 3390/ 21553 | consumed samples: 867840 | consumed tokens: 1777336320 | elapsed time per iteration (s): 0.29 | learning rate: 1.903E-04 | global batch size: 256 | lm loss: 3.694363E+00 | grad norm: 0.473 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.538 | TFLOPs: 30.41 | +7: iteration 3400/ 21553 | consumed samples: 870400 | consumed tokens: 1782579200 | elapsed time per iteration (s): 0.30 | learning rate: 1.903E-04 | global batch size: 256 | lm loss: 3.694681E+00 | grad norm: 0.513 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.495 | TFLOPs: 30.16 | +7: iteration 3410/ 21553 | consumed samples: 872960 | consumed tokens: 1787822080 | elapsed time per iteration (s): 0.30 | learning rate: 1.902E-04 | global batch size: 256 | lm loss: 3.697764E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.358 | TFLOPs: 30.36 | +7: iteration 3420/ 21553 | consumed samples: 875520 | consumed tokens: 1793064960 | elapsed time per iteration (s): 0.30 | learning rate: 1.902E-04 | global batch size: 256 | lm loss: 3.678488E+00 | grad norm: 0.509 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.882 | TFLOPs: 30.35 | +7: iteration 3430/ 21553 | consumed samples: 878080 | consumed tokens: 1798307840 | elapsed time per iteration (s): 0.30 | learning rate: 1.901E-04 | global batch size: 256 | lm loss: 3.674966E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.981 | TFLOPs: 29.93 | +7: iteration 3440/ 21553 | consumed samples: 880640 | consumed tokens: 1803550720 | elapsed time per iteration (s): 0.29 | learning rate: 1.900E-04 | global batch size: 256 | lm loss: 3.674178E+00 | grad norm: 0.498 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 869.152 | TFLOPs: 30.43 | +7: iteration 3450/ 21553 | consumed samples: 883200 | consumed tokens: 1808793600 | elapsed time per iteration (s): 0.29 | learning rate: 1.900E-04 | global batch size: 256 | lm loss: 3.690893E+00 | grad norm: 0.498 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.324 | TFLOPs: 30.40 | +7: iteration 3460/ 21553 | consumed samples: 885760 | consumed tokens: 1814036480 | elapsed time per iteration (s): 0.29 | learning rate: 1.899E-04 | global batch size: 256 | lm loss: 3.677505E+00 | grad norm: 0.497 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.784 | TFLOPs: 30.41 | +7: iteration 3470/ 21553 | consumed samples: 888320 | consumed tokens: 1819279360 | elapsed time per iteration (s): 0.29 | learning rate: 1.899E-04 | global batch size: 256 | lm loss: 3.672397E+00 | grad norm: 0.491 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.530 | TFLOPs: 30.40 | +7: iteration 3480/ 21553 | consumed samples: 890880 | consumed tokens: 1824522240 | elapsed time per iteration (s): 0.29 | learning rate: 1.898E-04 | global batch size: 256 | lm loss: 3.679321E+00 | grad norm: 0.498 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.191 | TFLOPs: 30.39 | +7: iteration 3490/ 21553 | consumed samples: 893440 | consumed tokens: 1829765120 | elapsed time per iteration (s): 0.29 | learning rate: 1.897E-04 | global batch size: 256 | lm loss: 3.656414E+00 | grad norm: 0.493 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.397 | TFLOPs: 30.40 | +7: iteration 3500/ 21553 | consumed samples: 896000 | consumed tokens: 1835008000 | elapsed time per iteration (s): 0.30 | learning rate: 1.897E-04 | global batch size: 256 | lm loss: 3.677279E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.145 | TFLOPs: 30.36 | +7: iteration 3510/ 21553 | consumed samples: 898560 | consumed tokens: 1840250880 | elapsed time per iteration (s): 0.30 | learning rate: 1.896E-04 | global batch size: 256 | lm loss: 3.677775E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.627 | TFLOPs: 30.34 | +7: iteration 3520/ 21553 | consumed samples: 901120 | consumed tokens: 1845493760 | elapsed time per iteration (s): 0.30 | learning rate: 1.896E-04 | global batch size: 256 | lm loss: 3.662740E+00 | grad norm: 0.539 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.727 | TFLOPs: 30.34 | +7: iteration 3530/ 21553 | consumed samples: 903680 | consumed tokens: 1850736640 | elapsed time per iteration (s): 0.30 | learning rate: 1.895E-04 | global batch size: 256 | lm loss: 3.671918E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.009 | TFLOPs: 30.35 | +7: iteration 3540/ 21553 | consumed samples: 906240 | consumed tokens: 1855979520 | elapsed time per iteration (s): 0.30 | learning rate: 1.894E-04 | global batch size: 256 | lm loss: 3.679025E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.210 | TFLOPs: 30.25 | +7: iteration 3550/ 21553 | consumed samples: 908800 | consumed tokens: 1861222400 | elapsed time per iteration (s): 0.30 | learning rate: 1.894E-04 | global batch size: 256 | lm loss: 3.667964E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.999 | TFLOPs: 30.35 | +7: iteration 3560/ 21553 | consumed samples: 911360 | consumed tokens: 1866465280 | elapsed time per iteration (s): 0.30 | learning rate: 1.893E-04 | global batch size: 256 | lm loss: 3.668352E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.366 | TFLOPs: 30.33 | +7: iteration 3570/ 21553 | consumed samples: 913920 | consumed tokens: 1871708160 | elapsed time per iteration (s): 0.30 | learning rate: 1.892E-04 | global batch size: 256 | lm loss: 3.649645E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.584 | TFLOPs: 30.34 | +7: iteration 3580/ 21553 | consumed samples: 916480 | consumed tokens: 1876951040 | elapsed time per iteration (s): 0.30 | learning rate: 1.892E-04 | global batch size: 256 | lm loss: 3.655698E+00 | grad norm: 0.530 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.360 | TFLOPs: 30.33 | +7: iteration 3590/ 21553 | consumed samples: 919040 | consumed tokens: 1882193920 | elapsed time per iteration (s): 0.30 | learning rate: 1.891E-04 | global batch size: 256 | lm loss: 3.655117E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.129 | TFLOPs: 30.32 | +7: iteration 3600/ 21553 | consumed samples: 921600 | consumed tokens: 1887436800 | elapsed time per iteration (s): 0.30 | learning rate: 1.891E-04 | global batch size: 256 | lm loss: 3.649724E+00 | grad norm: 0.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.902 | TFLOPs: 30.31 | +7: iteration 3610/ 21553 | consumed samples: 924160 | consumed tokens: 1892679680 | elapsed time per iteration (s): 0.30 | learning rate: 1.890E-04 | global batch size: 256 | lm loss: 3.662640E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.805 | TFLOPs: 30.31 | +7: iteration 3620/ 21553 | consumed samples: 926720 | consumed tokens: 1897922560 | elapsed time per iteration (s): 0.30 | learning rate: 1.889E-04 | global batch size: 256 | lm loss: 3.639087E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.583 | TFLOPs: 30.34 | +7: iteration 3630/ 21553 | consumed samples: 929280 | consumed tokens: 1903165440 | elapsed time per iteration (s): 0.30 | learning rate: 1.889E-04 | global batch size: 256 | lm loss: 3.635983E+00 | grad norm: 0.475 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.549 | TFLOPs: 29.67 | +7: iteration 3640/ 21553 | consumed samples: 931840 | consumed tokens: 1908408320 | elapsed time per iteration (s): 0.30 | learning rate: 1.888E-04 | global batch size: 256 | lm loss: 3.643287E+00 | grad norm: 0.555 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.927 | TFLOPs: 30.35 | +7: iteration 3650/ 21553 | consumed samples: 934400 | consumed tokens: 1913651200 | elapsed time per iteration (s): 0.30 | learning rate: 1.887E-04 | global batch size: 256 | lm loss: 3.649979E+00 | grad norm: 0.514 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.569 | TFLOPs: 30.34 | +7: iteration 3660/ 21553 | consumed samples: 936960 | consumed tokens: 1918894080 | elapsed time per iteration (s): 0.30 | learning rate: 1.887E-04 | global batch size: 256 | lm loss: 3.651036E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.998 | TFLOPs: 30.32 | +7: iteration 3670/ 21553 | consumed samples: 939520 | consumed tokens: 1924136960 | elapsed time per iteration (s): 0.30 | learning rate: 1.886E-04 | global batch size: 256 | lm loss: 3.652398E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.730 | TFLOPs: 30.34 | +7: iteration 3680/ 21553 | consumed samples: 942080 | consumed tokens: 1929379840 | elapsed time per iteration (s): 0.30 | learning rate: 1.885E-04 | global batch size: 256 | lm loss: 3.651306E+00 | grad norm: 0.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.947 | TFLOPs: 30.28 | +7: iteration 3690/ 21553 | consumed samples: 944640 | consumed tokens: 1934622720 | elapsed time per iteration (s): 0.30 | learning rate: 1.885E-04 | global batch size: 256 | lm loss: 3.639976E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.832 | TFLOPs: 30.21 | +7: iteration 3700/ 21553 | consumed samples: 947200 | consumed tokens: 1939865600 | elapsed time per iteration (s): 0.30 | learning rate: 1.884E-04 | global batch size: 256 | lm loss: 3.650327E+00 | grad norm: 0.526 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.746 | TFLOPs: 30.34 | +7: iteration 3710/ 21553 | consumed samples: 949760 | consumed tokens: 1945108480 | elapsed time per iteration (s): 0.30 | learning rate: 1.883E-04 | global batch size: 256 | lm loss: 3.625675E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.940 | TFLOPs: 30.31 | +7: iteration 3720/ 21553 | consumed samples: 952320 | consumed tokens: 1950351360 | elapsed time per iteration (s): 0.30 | learning rate: 1.883E-04 | global batch size: 256 | lm loss: 3.645001E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.845 | TFLOPs: 30.28 | +7: iteration 3730/ 21553 | consumed samples: 954880 | consumed tokens: 1955594240 | elapsed time per iteration (s): 0.30 | learning rate: 1.882E-04 | global batch size: 256 | lm loss: 3.647770E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.123 | TFLOPs: 30.36 | +7: iteration 3740/ 21553 | consumed samples: 957440 | consumed tokens: 1960837120 | elapsed time per iteration (s): 0.30 | learning rate: 1.882E-04 | global batch size: 256 | lm loss: 3.640016E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.547 | TFLOPs: 30.27 | +7: iteration 3750/ 21553 | consumed samples: 960000 | consumed tokens: 1966080000 | elapsed time per iteration (s): 0.30 | learning rate: 1.881E-04 | global batch size: 256 | lm loss: 3.627223E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.615 | TFLOPs: 30.30 | +7: iteration 3760/ 21553 | consumed samples: 962560 | consumed tokens: 1971322880 | elapsed time per iteration (s): 0.30 | learning rate: 1.880E-04 | global batch size: 256 | lm loss: 3.638915E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.557 | TFLOPs: 29.88 | +7: iteration 3770/ 21553 | consumed samples: 965120 | consumed tokens: 1976565760 | elapsed time per iteration (s): 0.30 | learning rate: 1.880E-04 | global batch size: 256 | lm loss: 3.622755E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.121 | TFLOPs: 30.25 | +7: iteration 3780/ 21553 | consumed samples: 967680 | consumed tokens: 1981808640 | elapsed time per iteration (s): 0.30 | learning rate: 1.879E-04 | global batch size: 256 | lm loss: 3.635292E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.523 | TFLOPs: 30.26 | +7: iteration 3790/ 21553 | consumed samples: 970240 | consumed tokens: 1987051520 | elapsed time per iteration (s): 0.30 | learning rate: 1.878E-04 | global batch size: 256 | lm loss: 3.641746E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.737 | TFLOPs: 30.31 | +7: iteration 3800/ 21553 | consumed samples: 972800 | consumed tokens: 1992294400 | elapsed time per iteration (s): 0.30 | learning rate: 1.878E-04 | global batch size: 256 | lm loss: 3.632482E+00 | grad norm: 0.507 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.898 | TFLOPs: 30.31 | +7: iteration 3810/ 21553 | consumed samples: 975360 | consumed tokens: 1997537280 | elapsed time per iteration (s): 0.30 | learning rate: 1.877E-04 | global batch size: 256 | lm loss: 3.624069E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.071 | TFLOPs: 30.32 | +7: iteration 3820/ 21553 | consumed samples: 977920 | consumed tokens: 2002780160 | elapsed time per iteration (s): 0.30 | learning rate: 1.876E-04 | global batch size: 256 | lm loss: 3.632056E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.089 | TFLOPs: 30.28 | +7: iteration 3830/ 21553 | consumed samples: 980480 | consumed tokens: 2008023040 | elapsed time per iteration (s): 0.30 | learning rate: 1.876E-04 | global batch size: 256 | lm loss: 3.631409E+00 | grad norm: 0.484 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.022 | TFLOPs: 30.28 | +7: iteration 3840/ 21553 | consumed samples: 983040 | consumed tokens: 2013265920 | elapsed time per iteration (s): 0.30 | learning rate: 1.875E-04 | global batch size: 256 | lm loss: 3.640799E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.718 | TFLOPs: 30.31 | +7: iteration 3850/ 21553 | consumed samples: 985600 | consumed tokens: 2018508800 | elapsed time per iteration (s): 0.30 | learning rate: 1.874E-04 | global batch size: 256 | lm loss: 3.614359E+00 | grad norm: 0.563 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.050 | TFLOPs: 30.32 | +7: iteration 3860/ 21553 | consumed samples: 988160 | consumed tokens: 2023751680 | elapsed time per iteration (s): 0.30 | learning rate: 1.874E-04 | global batch size: 256 | lm loss: 3.626331E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.033 | TFLOPs: 30.35 | +7: iteration 3870/ 21553 | consumed samples: 990720 | consumed tokens: 2028994560 | elapsed time per iteration (s): 0.30 | learning rate: 1.873E-04 | global batch size: 256 | lm loss: 3.618562E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.057 | TFLOPs: 30.35 | +7: iteration 3880/ 21553 | consumed samples: 993280 | consumed tokens: 2034237440 | elapsed time per iteration (s): 0.30 | learning rate: 1.872E-04 | global batch size: 256 | lm loss: 3.617046E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.697 | TFLOPs: 30.34 | +7: iteration 3890/ 21553 | consumed samples: 995840 | consumed tokens: 2039480320 | elapsed time per iteration (s): 0.30 | learning rate: 1.871E-04 | global batch size: 256 | lm loss: 3.610705E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.571 | TFLOPs: 30.34 | +7: iteration 3900/ 21553 | consumed samples: 998400 | consumed tokens: 2044723200 | elapsed time per iteration (s): 0.30 | learning rate: 1.871E-04 | global batch size: 256 | lm loss: 3.619778E+00 | grad norm: 0.506 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.922 | TFLOPs: 30.35 | +7: iteration 3910/ 21553 | consumed samples: 1000960 | consumed tokens: 2049966080 | elapsed time per iteration (s): 0.30 | learning rate: 1.870E-04 | global batch size: 256 | lm loss: 3.621497E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.919 | TFLOPs: 30.35 | +7: iteration 3920/ 21553 | consumed samples: 1003520 | consumed tokens: 2055208960 | elapsed time per iteration (s): 0.30 | learning rate: 1.869E-04 | global batch size: 256 | lm loss: 3.613568E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.584 | TFLOPs: 30.06 | +7: iteration 3930/ 21553 | consumed samples: 1006080 | consumed tokens: 2060451840 | elapsed time per iteration (s): 0.30 | learning rate: 1.869E-04 | global batch size: 256 | lm loss: 3.615202E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.862 | TFLOPs: 29.93 | +7: iteration 3940/ 21553 | consumed samples: 1008640 | consumed tokens: 2065694720 | elapsed time per iteration (s): 0.30 | learning rate: 1.868E-04 | global batch size: 256 | lm loss: 3.595695E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.717 | TFLOPs: 30.34 | +7: iteration 3950/ 21553 | consumed samples: 1011200 | consumed tokens: 2070937600 | elapsed time per iteration (s): 0.30 | learning rate: 1.867E-04 | global batch size: 256 | lm loss: 3.613329E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.308 | TFLOPs: 30.33 | +7: iteration 3960/ 21553 | consumed samples: 1013760 | consumed tokens: 2076180480 | elapsed time per iteration (s): 0.30 | learning rate: 1.867E-04 | global batch size: 256 | lm loss: 3.616171E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.417 | TFLOPs: 30.33 | +7: iteration 3970/ 21553 | consumed samples: 1016320 | consumed tokens: 2081423360 | elapsed time per iteration (s): 0.30 | learning rate: 1.866E-04 | global batch size: 256 | lm loss: 3.607252E+00 | grad norm: 0.477 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.260 | TFLOPs: 30.33 | +7: iteration 3980/ 21553 | consumed samples: 1018880 | consumed tokens: 2086666240 | elapsed time per iteration (s): 0.30 | learning rate: 1.865E-04 | global batch size: 256 | lm loss: 3.600078E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.912 | TFLOPs: 30.35 | +7: iteration 3990/ 21553 | consumed samples: 1021440 | consumed tokens: 2091909120 | elapsed time per iteration (s): 0.30 | learning rate: 1.865E-04 | global batch size: 256 | lm loss: 3.601826E+00 | grad norm: 0.473 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.114 | TFLOPs: 30.32 | +0: [2023-03-13 23:38:27,356] [INFO] [logging.py:68:log_dist] [Rank 0] step=4000, skipped=0, lr=[0.00018638652011758862, 0.00018638652011758862, 0.00018638652011758862], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 4000/ 21553 | consumed samples: 1024000 | consumed tokens: 2097152000 | elapsed time per iteration (s): 0.30 | learning rate: 1.864E-04 | global batch size: 256 | lm loss: 3.605963E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.422 | TFLOPs: 30.33 | +0: steps: 4000 loss: 3.5854 iter time (s): 0.294 samples/sec: 872.071 +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 4000 | lm loss value: 3.814986E+00 | lm loss PPL: 4.537611E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 4000 to checkpoints_146m14b100m +0: [2023-03-13 23:38:27,476] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step4000 is begin to save! +0: [2023-03-13 23:38:27,479] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:38:27,563] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:38:27,564] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:38:27,580] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:38:27,580] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:38:27,595] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:38:27,596] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:38:27,611] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:38:27,611] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:38:27,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:38:27,627] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:38:27,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:38:27,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:38:27,657] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:38:27,657] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:38:27,673] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:38:27,673] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:38:27,688] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:38:27,688] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:38:27,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:38:27,704] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:38:27,719] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:38:27,719] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:38:27,734] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:38:27,735] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:38:27,750] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:38:27,750] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:38:27,765] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:38:27,766] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:38:27,781] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:38:27,781] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:38:27,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:38:27,796] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:38:27,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:38:27,798] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step4000/mp_rank_00_model_states.pt +0: [2023-03-13 23:38:27,798] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:38:27,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:38:27,847] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,847] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,847] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,850] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,851] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,851] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,852] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,852] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,852] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,852] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,855] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,855] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,855] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,860] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,860] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:38:27,860] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,860] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +5: [2023-03-13 23:38:27,860] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +1: [2023-03-13 23:38:27,865] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,855] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,855] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,855] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,853] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,853] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,854] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,854] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,857] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,857] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,857] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,866] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,866] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,858] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +4: [2023-03-13 23:38:27,866] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,858] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:38:27,866] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +2: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:38:27,859] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:38:27,859] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,871] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,871] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:38:27,871] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,871] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: [2023-03-13 23:38:27,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:38:27,882] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,883] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,883] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,883] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,888] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,888] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,888] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,891] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,891] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,892] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,892] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,892] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +7: [2023-03-13 23:38:27,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:38:27,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:38:27,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +3: [2023-03-13 23:38:27,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:38:27,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step4000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-13 23:38:27,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +0: successfully saved checkpoint at iteration 4000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 424.47 +7: iteration 4010/ 21553 | consumed samples: 1026560 | consumed tokens: 2102394880 | elapsed time per iteration (s): 0.35 | learning rate: 1.863E-04 | global batch size: 256 | lm loss: 3.593925E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 734.584 | TFLOPs: 25.72 | +7: iteration 4020/ 21553 | consumed samples: 1029120 | consumed tokens: 2107637760 | elapsed time per iteration (s): 0.30 | learning rate: 1.862E-04 | global batch size: 256 | lm loss: 3.598876E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.321 | TFLOPs: 30.33 | +7: iteration 4030/ 21553 | consumed samples: 1031680 | consumed tokens: 2112880640 | elapsed time per iteration (s): 0.30 | learning rate: 1.862E-04 | global batch size: 256 | lm loss: 3.605729E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.663 | TFLOPs: 30.34 | +7: iteration 4040/ 21553 | consumed samples: 1034240 | consumed tokens: 2118123520 | elapsed time per iteration (s): 0.30 | learning rate: 1.861E-04 | global batch size: 256 | lm loss: 3.599716E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.445 | TFLOPs: 30.33 | +7: iteration 4050/ 21553 | consumed samples: 1036800 | consumed tokens: 2123366400 | elapsed time per iteration (s): 0.30 | learning rate: 1.860E-04 | global batch size: 256 | lm loss: 3.602467E+00 | grad norm: 0.496 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.851 | TFLOPs: 30.35 | +7: iteration 4060/ 21553 | consumed samples: 1039360 | consumed tokens: 2128609280 | elapsed time per iteration (s): 0.30 | learning rate: 1.860E-04 | global batch size: 256 | lm loss: 3.589561E+00 | grad norm: 0.498 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.782 | TFLOPs: 30.34 | +7: iteration 4070/ 21553 | consumed samples: 1041920 | consumed tokens: 2133852160 | elapsed time per iteration (s): 0.30 | learning rate: 1.859E-04 | global batch size: 256 | lm loss: 3.599025E+00 | grad norm: 0.526 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.303 | TFLOPs: 30.33 | +7: iteration 4080/ 21553 | consumed samples: 1044480 | consumed tokens: 2139095040 | elapsed time per iteration (s): 0.30 | learning rate: 1.858E-04 | global batch size: 256 | lm loss: 3.603081E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.877 | TFLOPs: 30.35 | +7: iteration 4090/ 21553 | consumed samples: 1047040 | consumed tokens: 2144337920 | elapsed time per iteration (s): 0.30 | learning rate: 1.857E-04 | global batch size: 256 | lm loss: 3.582727E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.535 | TFLOPs: 30.33 | +7: iteration 4100/ 21553 | consumed samples: 1049600 | consumed tokens: 2149580800 | elapsed time per iteration (s): 0.30 | learning rate: 1.857E-04 | global batch size: 256 | lm loss: 3.590814E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.974 | TFLOPs: 30.32 | +7: iteration 4110/ 21553 | consumed samples: 1052160 | consumed tokens: 2154823680 | elapsed time per iteration (s): 0.30 | learning rate: 1.856E-04 | global batch size: 256 | lm loss: 3.583480E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.731 | TFLOPs: 30.20 | +7: iteration 4120/ 21553 | consumed samples: 1054720 | consumed tokens: 2160066560 | elapsed time per iteration (s): 0.30 | learning rate: 1.855E-04 | global batch size: 256 | lm loss: 3.599645E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.486 | TFLOPs: 30.33 | +7: iteration 4130/ 21553 | consumed samples: 1057280 | consumed tokens: 2165309440 | elapsed time per iteration (s): 0.30 | learning rate: 1.855E-04 | global batch size: 256 | lm loss: 3.594786E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.489 | TFLOPs: 30.33 | +7: iteration 4140/ 21553 | consumed samples: 1059840 | consumed tokens: 2170552320 | elapsed time per iteration (s): 0.30 | learning rate: 1.854E-04 | global batch size: 256 | lm loss: 3.585149E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.841 | TFLOPs: 30.31 | +7: iteration 4150/ 21553 | consumed samples: 1062400 | consumed tokens: 2175795200 | elapsed time per iteration (s): 0.30 | learning rate: 1.853E-04 | global batch size: 256 | lm loss: 3.586044E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.954 | TFLOPs: 30.21 | +7: iteration 4160/ 21553 | consumed samples: 1064960 | consumed tokens: 2181038080 | elapsed time per iteration (s): 0.30 | learning rate: 1.852E-04 | global batch size: 256 | lm loss: 3.592381E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.135 | TFLOPs: 30.22 | +7: iteration 4170/ 21553 | consumed samples: 1067520 | consumed tokens: 2186280960 | elapsed time per iteration (s): 0.30 | learning rate: 1.852E-04 | global batch size: 256 | lm loss: 3.581277E+00 | grad norm: 0.519 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.978 | TFLOPs: 30.21 | +7: iteration 4180/ 21553 | consumed samples: 1070080 | consumed tokens: 2191523840 | elapsed time per iteration (s): 0.30 | learning rate: 1.851E-04 | global batch size: 256 | lm loss: 3.578955E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.586 | TFLOPs: 30.20 | +7: iteration 4190/ 21553 | consumed samples: 1072640 | consumed tokens: 2196766720 | elapsed time per iteration (s): 0.30 | learning rate: 1.850E-04 | global batch size: 256 | lm loss: 3.590302E+00 | grad norm: 0.583 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.133 | TFLOPs: 30.22 | +7: iteration 4200/ 21553 | consumed samples: 1075200 | consumed tokens: 2202009600 | elapsed time per iteration (s): 0.30 | learning rate: 1.850E-04 | global batch size: 256 | lm loss: 3.598820E+00 | grad norm: 0.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.578 | TFLOPs: 30.23 | +7: iteration 4210/ 21553 | consumed samples: 1077760 | consumed tokens: 2207252480 | elapsed time per iteration (s): 0.30 | learning rate: 1.849E-04 | global batch size: 256 | lm loss: 3.575968E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.280 | TFLOPs: 30.36 | +7: iteration 4220/ 21553 | consumed samples: 1080320 | consumed tokens: 2212495360 | elapsed time per iteration (s): 0.30 | learning rate: 1.848E-04 | global batch size: 256 | lm loss: 3.581316E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.646 | TFLOPs: 30.34 | +7: iteration 4230/ 21553 | consumed samples: 1082880 | consumed tokens: 2217738240 | elapsed time per iteration (s): 0.30 | learning rate: 1.847E-04 | global batch size: 256 | lm loss: 3.587523E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.861 | TFLOPs: 30.35 | +7: iteration 4240/ 21553 | consumed samples: 1085440 | consumed tokens: 2222981120 | elapsed time per iteration (s): 0.30 | learning rate: 1.847E-04 | global batch size: 256 | lm loss: 3.580676E+00 | grad norm: 0.475 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.276 | TFLOPs: 30.36 | +7: iteration 4250/ 21553 | consumed samples: 1088000 | consumed tokens: 2228224000 | elapsed time per iteration (s): 0.30 | learning rate: 1.846E-04 | global batch size: 256 | lm loss: 3.576801E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.469 | TFLOPs: 30.37 | +7: iteration 4260/ 21553 | consumed samples: 1090560 | consumed tokens: 2233466880 | elapsed time per iteration (s): 0.29 | learning rate: 1.845E-04 | global batch size: 256 | lm loss: 3.582041E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.122 | TFLOPs: 30.39 | +7: iteration 4270/ 21553 | consumed samples: 1093120 | consumed tokens: 2238709760 | elapsed time per iteration (s): 0.30 | learning rate: 1.844E-04 | global batch size: 256 | lm loss: 3.572811E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.073 | TFLOPs: 30.35 | +7: iteration 4280/ 21553 | consumed samples: 1095680 | consumed tokens: 2243952640 | elapsed time per iteration (s): 0.30 | learning rate: 1.844E-04 | global batch size: 256 | lm loss: 3.580938E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.106 | TFLOPs: 30.35 | +7: iteration 4290/ 21553 | consumed samples: 1098240 | consumed tokens: 2249195520 | elapsed time per iteration (s): 0.30 | learning rate: 1.843E-04 | global batch size: 256 | lm loss: 3.572829E+00 | grad norm: 0.486 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.316 | TFLOPs: 30.36 | +7: iteration 4300/ 21553 | consumed samples: 1100800 | consumed tokens: 2254438400 | elapsed time per iteration (s): 0.30 | learning rate: 1.842E-04 | global batch size: 256 | lm loss: 3.575382E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.172 | TFLOPs: 30.36 | +7: iteration 4310/ 21553 | consumed samples: 1103360 | consumed tokens: 2259681280 | elapsed time per iteration (s): 0.29 | learning rate: 1.841E-04 | global batch size: 256 | lm loss: 3.574084E+00 | grad norm: 0.598 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.817 | TFLOPs: 30.38 | +7: iteration 4320/ 21553 | consumed samples: 1105920 | consumed tokens: 2264924160 | elapsed time per iteration (s): 0.29 | learning rate: 1.841E-04 | global batch size: 256 | lm loss: 3.563501E+00 | grad norm: 0.474 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.766 | TFLOPs: 30.41 | +7: iteration 4330/ 21553 | consumed samples: 1108480 | consumed tokens: 2270167040 | elapsed time per iteration (s): 0.29 | learning rate: 1.840E-04 | global batch size: 256 | lm loss: 3.563913E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.322 | TFLOPs: 30.40 | +7: iteration 4340/ 21553 | consumed samples: 1111040 | consumed tokens: 2275409920 | elapsed time per iteration (s): 0.29 | learning rate: 1.839E-04 | global batch size: 256 | lm loss: 3.571528E+00 | grad norm: 0.475 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.919 | TFLOPs: 30.38 | +7: iteration 4350/ 21553 | consumed samples: 1113600 | consumed tokens: 2280652800 | elapsed time per iteration (s): 0.29 | learning rate: 1.838E-04 | global batch size: 256 | lm loss: 3.574436E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.936 | TFLOPs: 30.38 | +7: iteration 4360/ 21553 | consumed samples: 1116160 | consumed tokens: 2285895680 | elapsed time per iteration (s): 0.30 | learning rate: 1.838E-04 | global batch size: 256 | lm loss: 3.566277E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.234 | TFLOPs: 29.87 | +7: iteration 4370/ 21553 | consumed samples: 1118720 | consumed tokens: 2291138560 | elapsed time per iteration (s): 0.30 | learning rate: 1.837E-04 | global batch size: 256 | lm loss: 3.570149E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.218 | TFLOPs: 30.36 | +7: iteration 4380/ 21553 | consumed samples: 1121280 | consumed tokens: 2296381440 | elapsed time per iteration (s): 0.29 | learning rate: 1.836E-04 | global batch size: 256 | lm loss: 3.552244E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.022 | TFLOPs: 30.39 | +7: iteration 4390/ 21553 | consumed samples: 1123840 | consumed tokens: 2301624320 | elapsed time per iteration (s): 0.30 | learning rate: 1.835E-04 | global batch size: 256 | lm loss: 3.563239E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.465 | TFLOPs: 30.37 | +7: iteration 4400/ 21553 | consumed samples: 1126400 | consumed tokens: 2306867200 | elapsed time per iteration (s): 0.30 | learning rate: 1.835E-04 | global batch size: 256 | lm loss: 3.559962E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.425 | TFLOPs: 30.37 | +7: iteration 4410/ 21553 | consumed samples: 1128960 | consumed tokens: 2312110080 | elapsed time per iteration (s): 0.30 | learning rate: 1.834E-04 | global batch size: 256 | lm loss: 3.559438E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.995 | TFLOPs: 30.35 | +7: iteration 4420/ 21553 | consumed samples: 1131520 | consumed tokens: 2317352960 | elapsed time per iteration (s): 0.30 | learning rate: 1.833E-04 | global batch size: 256 | lm loss: 3.553418E+00 | grad norm: 0.495 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.190 | TFLOPs: 30.36 | +7: iteration 4430/ 21553 | consumed samples: 1134080 | consumed tokens: 2322595840 | elapsed time per iteration (s): 0.30 | learning rate: 1.832E-04 | global batch size: 256 | lm loss: 3.559764E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.141 | TFLOPs: 30.36 | +7: iteration 4440/ 21553 | consumed samples: 1136640 | consumed tokens: 2327838720 | elapsed time per iteration (s): 0.30 | learning rate: 1.831E-04 | global batch size: 256 | lm loss: 3.553875E+00 | grad norm: 0.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.115 | TFLOPs: 30.36 | +7: iteration 4450/ 21553 | consumed samples: 1139200 | consumed tokens: 2333081600 | elapsed time per iteration (s): 0.30 | learning rate: 1.831E-04 | global batch size: 256 | lm loss: 3.561061E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.173 | TFLOPs: 30.36 | +7: iteration 4460/ 21553 | consumed samples: 1141760 | consumed tokens: 2338324480 | elapsed time per iteration (s): 0.30 | learning rate: 1.830E-04 | global batch size: 256 | lm loss: 3.538281E+00 | grad norm: 0.491 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.022 | TFLOPs: 30.35 | +7: iteration 4470/ 21553 | consumed samples: 1144320 | consumed tokens: 2343567360 | elapsed time per iteration (s): 0.30 | learning rate: 1.829E-04 | global batch size: 256 | lm loss: 3.555544E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.632 | TFLOPs: 30.37 | +7: iteration 4480/ 21553 | consumed samples: 1146880 | consumed tokens: 2348810240 | elapsed time per iteration (s): 0.30 | learning rate: 1.828E-04 | global batch size: 256 | lm loss: 3.549053E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.219 | TFLOPs: 30.36 | +7: iteration 4490/ 21553 | consumed samples: 1149440 | consumed tokens: 2354053120 | elapsed time per iteration (s): 0.30 | learning rate: 1.828E-04 | global batch size: 256 | lm loss: 3.559713E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.615 | TFLOPs: 30.37 | +7: iteration 4500/ 21553 | consumed samples: 1152000 | consumed tokens: 2359296000 | elapsed time per iteration (s): 0.30 | learning rate: 1.827E-04 | global batch size: 256 | lm loss: 3.548780E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.261 | TFLOPs: 30.36 | +7: iteration 4510/ 21553 | consumed samples: 1154560 | consumed tokens: 2364538880 | elapsed time per iteration (s): 0.30 | learning rate: 1.826E-04 | global batch size: 256 | lm loss: 3.560949E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.512 | TFLOPs: 30.37 | +7: iteration 4520/ 21553 | consumed samples: 1157120 | consumed tokens: 2369781760 | elapsed time per iteration (s): 0.30 | learning rate: 1.825E-04 | global batch size: 256 | lm loss: 3.541629E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.303 | TFLOPs: 30.36 | +7: iteration 4530/ 21553 | consumed samples: 1159680 | consumed tokens: 2375024640 | elapsed time per iteration (s): 0.30 | learning rate: 1.824E-04 | global batch size: 256 | lm loss: 3.537281E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.180 | TFLOPs: 30.36 | +7: iteration 4540/ 21553 | consumed samples: 1162240 | consumed tokens: 2380267520 | elapsed time per iteration (s): 0.30 | learning rate: 1.824E-04 | global batch size: 256 | lm loss: 3.541238E+00 | grad norm: 0.525 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.122 | TFLOPs: 30.36 | +7: iteration 4550/ 21553 | consumed samples: 1164800 | consumed tokens: 2385510400 | elapsed time per iteration (s): 0.30 | learning rate: 1.823E-04 | global batch size: 256 | lm loss: 3.546481E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.902 | TFLOPs: 30.35 | +7: iteration 4560/ 21553 | consumed samples: 1167360 | consumed tokens: 2390753280 | elapsed time per iteration (s): 0.30 | learning rate: 1.822E-04 | global batch size: 256 | lm loss: 3.536685E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.921 | TFLOPs: 30.35 | +7: iteration 4570/ 21553 | consumed samples: 1169920 | consumed tokens: 2395996160 | elapsed time per iteration (s): 0.30 | learning rate: 1.821E-04 | global batch size: 256 | lm loss: 3.542591E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.871 | TFLOPs: 30.35 | +7: iteration 4580/ 21553 | consumed samples: 1172480 | consumed tokens: 2401239040 | elapsed time per iteration (s): 0.30 | learning rate: 1.820E-04 | global batch size: 256 | lm loss: 3.535985E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.198 | TFLOPs: 30.36 | +7: iteration 4590/ 21553 | consumed samples: 1175040 | consumed tokens: 2406481920 | elapsed time per iteration (s): 0.30 | learning rate: 1.820E-04 | global batch size: 256 | lm loss: 3.541962E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.344 | TFLOPs: 30.36 | +7: iteration 4600/ 21553 | consumed samples: 1177600 | consumed tokens: 2411724800 | elapsed time per iteration (s): 0.30 | learning rate: 1.819E-04 | global batch size: 256 | lm loss: 3.522526E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.617 | TFLOPs: 30.37 | +7: iteration 4610/ 21553 | consumed samples: 1180160 | consumed tokens: 2416967680 | elapsed time per iteration (s): 0.30 | learning rate: 1.818E-04 | global batch size: 256 | lm loss: 3.539164E+00 | grad norm: 0.480 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.551 | TFLOPs: 30.37 | +7: iteration 4620/ 21553 | consumed samples: 1182720 | consumed tokens: 2422210560 | elapsed time per iteration (s): 0.30 | learning rate: 1.817E-04 | global batch size: 256 | lm loss: 3.524503E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.331 | TFLOPs: 30.36 | +7: iteration 4630/ 21553 | consumed samples: 1185280 | consumed tokens: 2427453440 | elapsed time per iteration (s): 0.30 | learning rate: 1.816E-04 | global batch size: 256 | lm loss: 3.535016E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.290 | TFLOPs: 30.36 | +7: iteration 4640/ 21553 | consumed samples: 1187840 | consumed tokens: 2432696320 | elapsed time per iteration (s): 0.30 | learning rate: 1.816E-04 | global batch size: 256 | lm loss: 3.542697E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.253 | TFLOPs: 30.36 | +7: iteration 4650/ 21553 | consumed samples: 1190400 | consumed tokens: 2437939200 | elapsed time per iteration (s): 0.30 | learning rate: 1.815E-04 | global batch size: 256 | lm loss: 3.547315E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.354 | TFLOPs: 30.36 | +7: iteration 4660/ 21553 | consumed samples: 1192960 | consumed tokens: 2443182080 | elapsed time per iteration (s): 0.30 | learning rate: 1.814E-04 | global batch size: 256 | lm loss: 3.517054E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.050 | TFLOPs: 30.35 | +7: iteration 4670/ 21553 | consumed samples: 1195520 | consumed tokens: 2448424960 | elapsed time per iteration (s): 0.30 | learning rate: 1.813E-04 | global batch size: 256 | lm loss: 3.536150E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.452 | TFLOPs: 30.37 | +7: iteration 4680/ 21553 | consumed samples: 1198080 | consumed tokens: 2453667840 | elapsed time per iteration (s): 0.30 | learning rate: 1.812E-04 | global batch size: 256 | lm loss: 3.536631E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.137 | TFLOPs: 30.36 | +7: iteration 4690/ 21553 | consumed samples: 1200640 | consumed tokens: 2458910720 | elapsed time per iteration (s): 0.30 | learning rate: 1.812E-04 | global batch size: 256 | lm loss: 3.527846E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.383 | TFLOPs: 30.36 | +7: iteration 4700/ 21553 | consumed samples: 1203200 | consumed tokens: 2464153600 | elapsed time per iteration (s): 0.30 | learning rate: 1.811E-04 | global batch size: 256 | lm loss: 3.528874E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.120 | TFLOPs: 30.36 | +7: iteration 4710/ 21553 | consumed samples: 1205760 | consumed tokens: 2469396480 | elapsed time per iteration (s): 0.30 | learning rate: 1.810E-04 | global batch size: 256 | lm loss: 3.530621E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.243 | TFLOPs: 30.36 | +7: iteration 4720/ 21553 | consumed samples: 1208320 | consumed tokens: 2474639360 | elapsed time per iteration (s): 0.30 | learning rate: 1.809E-04 | global batch size: 256 | lm loss: 3.532275E+00 | grad norm: 0.482 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.334 | TFLOPs: 30.36 | +7: iteration 4730/ 21553 | consumed samples: 1210880 | consumed tokens: 2479882240 | elapsed time per iteration (s): 0.30 | learning rate: 1.808E-04 | global batch size: 256 | lm loss: 3.532205E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.262 | TFLOPs: 30.36 | +7: iteration 4740/ 21553 | consumed samples: 1213440 | consumed tokens: 2485125120 | elapsed time per iteration (s): 0.30 | learning rate: 1.808E-04 | global batch size: 256 | lm loss: 3.519651E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.596 | TFLOPs: 30.37 | +7: iteration 4750/ 21553 | consumed samples: 1216000 | consumed tokens: 2490368000 | elapsed time per iteration (s): 0.30 | learning rate: 1.807E-04 | global batch size: 256 | lm loss: 3.525377E+00 | grad norm: 0.476 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.061 | TFLOPs: 30.35 | +7: iteration 4760/ 21553 | consumed samples: 1218560 | consumed tokens: 2495610880 | elapsed time per iteration (s): 0.30 | learning rate: 1.806E-04 | global batch size: 256 | lm loss: 3.518056E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.536 | TFLOPs: 30.37 | +7: iteration 4770/ 21553 | consumed samples: 1221120 | consumed tokens: 2500853760 | elapsed time per iteration (s): 0.30 | learning rate: 1.805E-04 | global batch size: 256 | lm loss: 3.531648E+00 | grad norm: 0.472 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.561 | TFLOPs: 30.37 | +7: iteration 4780/ 21553 | consumed samples: 1223680 | consumed tokens: 2506096640 | elapsed time per iteration (s): 0.30 | learning rate: 1.804E-04 | global batch size: 256 | lm loss: 3.521148E+00 | grad norm: 0.473 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.542 | TFLOPs: 30.37 | +7: iteration 4790/ 21553 | consumed samples: 1226240 | consumed tokens: 2511339520 | elapsed time per iteration (s): 0.30 | learning rate: 1.803E-04 | global batch size: 256 | lm loss: 3.509569E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.520 | TFLOPs: 30.37 | +7: iteration 4800/ 21553 | consumed samples: 1228800 | consumed tokens: 2516582400 | elapsed time per iteration (s): 0.30 | learning rate: 1.803E-04 | global batch size: 256 | lm loss: 3.514326E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.304 | TFLOPs: 30.36 | +7: iteration 4810/ 21553 | consumed samples: 1231360 | consumed tokens: 2521825280 | elapsed time per iteration (s): 0.30 | learning rate: 1.802E-04 | global batch size: 256 | lm loss: 3.512380E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.546 | TFLOPs: 30.37 | +7: iteration 4820/ 21553 | consumed samples: 1233920 | consumed tokens: 2527068160 | elapsed time per iteration (s): 0.29 | learning rate: 1.801E-04 | global batch size: 256 | lm loss: 3.518758E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.042 | TFLOPs: 30.39 | +7: iteration 4830/ 21553 | consumed samples: 1236480 | consumed tokens: 2532311040 | elapsed time per iteration (s): 0.30 | learning rate: 1.800E-04 | global batch size: 256 | lm loss: 3.511021E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.455 | TFLOPs: 30.37 | +7: iteration 4840/ 21553 | consumed samples: 1239040 | consumed tokens: 2537553920 | elapsed time per iteration (s): 0.30 | learning rate: 1.799E-04 | global batch size: 256 | lm loss: 3.517901E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.184 | TFLOPs: 30.36 | +7: iteration 4850/ 21553 | consumed samples: 1241600 | consumed tokens: 2542796800 | elapsed time per iteration (s): 0.30 | learning rate: 1.798E-04 | global batch size: 256 | lm loss: 3.504273E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.109 | TFLOPs: 30.36 | +7: iteration 4860/ 21553 | consumed samples: 1244160 | consumed tokens: 2548039680 | elapsed time per iteration (s): 0.30 | learning rate: 1.798E-04 | global batch size: 256 | lm loss: 3.527311E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.272 | TFLOPs: 30.36 | +7: iteration 4870/ 21553 | consumed samples: 1246720 | consumed tokens: 2553282560 | elapsed time per iteration (s): 0.30 | learning rate: 1.797E-04 | global batch size: 256 | lm loss: 3.515278E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.330 | TFLOPs: 30.36 | +7: iteration 4880/ 21553 | consumed samples: 1249280 | consumed tokens: 2558525440 | elapsed time per iteration (s): 0.29 | learning rate: 1.796E-04 | global batch size: 256 | lm loss: 3.499326E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.819 | TFLOPs: 30.38 | +7: iteration 4890/ 21553 | consumed samples: 1251840 | consumed tokens: 2563768320 | elapsed time per iteration (s): 0.30 | learning rate: 1.795E-04 | global batch size: 256 | lm loss: 3.511778E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.532 | TFLOPs: 30.37 | +7: iteration 4900/ 21553 | consumed samples: 1254400 | consumed tokens: 2569011200 | elapsed time per iteration (s): 0.30 | learning rate: 1.794E-04 | global batch size: 256 | lm loss: 3.511217E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.605 | TFLOPs: 30.37 | +7: iteration 4910/ 21553 | consumed samples: 1256960 | consumed tokens: 2574254080 | elapsed time per iteration (s): 0.30 | learning rate: 1.793E-04 | global batch size: 256 | lm loss: 3.515438E+00 | grad norm: 0.520 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.791 | TFLOPs: 30.38 | +7: iteration 4920/ 21553 | consumed samples: 1259520 | consumed tokens: 2579496960 | elapsed time per iteration (s): 0.30 | learning rate: 1.793E-04 | global batch size: 256 | lm loss: 3.508230E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.426 | TFLOPs: 30.37 | +7: iteration 4930/ 21553 | consumed samples: 1262080 | consumed tokens: 2584739840 | elapsed time per iteration (s): 0.30 | learning rate: 1.792E-04 | global batch size: 256 | lm loss: 3.514836E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.192 | TFLOPs: 30.36 | +7: iteration 4940/ 21553 | consumed samples: 1264640 | consumed tokens: 2589982720 | elapsed time per iteration (s): 0.29 | learning rate: 1.791E-04 | global batch size: 256 | lm loss: 3.509128E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.160 | TFLOPs: 30.39 | +7: iteration 4950/ 21553 | consumed samples: 1267200 | consumed tokens: 2595225600 | elapsed time per iteration (s): 0.30 | learning rate: 1.790E-04 | global batch size: 256 | lm loss: 3.513600E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.368 | TFLOPs: 30.36 | +7: iteration 4960/ 21553 | consumed samples: 1269760 | consumed tokens: 2600468480 | elapsed time per iteration (s): 0.30 | learning rate: 1.789E-04 | global batch size: 256 | lm loss: 3.507329E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.095 | TFLOPs: 30.35 | +7: iteration 4970/ 21553 | consumed samples: 1272320 | consumed tokens: 2605711360 | elapsed time per iteration (s): 0.30 | learning rate: 1.788E-04 | global batch size: 256 | lm loss: 3.516240E+00 | grad norm: 0.528 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.712 | TFLOPs: 30.38 | +7: iteration 4980/ 21553 | consumed samples: 1274880 | consumed tokens: 2610954240 | elapsed time per iteration (s): 0.30 | learning rate: 1.787E-04 | global batch size: 256 | lm loss: 3.497857E+00 | grad norm: 0.612 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.389 | TFLOPs: 30.36 | +7: iteration 4990/ 21553 | consumed samples: 1277440 | consumed tokens: 2616197120 | elapsed time per iteration (s): 0.30 | learning rate: 1.787E-04 | global batch size: 256 | lm loss: 3.511886E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.577 | TFLOPs: 30.37 | +7: iteration 5000/ 21553 | consumed samples: 1280000 | consumed tokens: 2621440000 | elapsed time per iteration (s): 0.30 | learning rate: 1.786E-04 | global batch size: 256 | lm loss: 3.502854E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.462 | TFLOPs: 30.37 | +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 5000 | lm loss value: 3.768303E+00 | lm loss PPL: 4.330652E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 5000 to checkpoints_146m14b100m +0: [2023-03-13 23:43:23,322] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step5000 is begin to save! +0: [2023-03-13 23:43:23,326] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:43:23,412] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:43:23,413] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:43:23,428] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:43:23,428] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:43:23,443] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:43:23,443] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:43:23,458] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:43:23,458] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:43:23,473] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:43:23,473] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:43:23,488] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:43:23,488] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:43:23,503] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:43:23,503] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:43:23,518] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:43:23,518] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:43:23,533] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:43:23,533] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:43:23,548] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:43:23,548] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:43:23,563] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:43:23,563] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:43:23,578] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:43:23,579] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:43:23,593] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:43:23,594] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:43:23,608] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:43:23,609] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:43:23,623] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:43:23,624] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:43:23,638] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:43:23,639] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:43:23,640] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:43:23,640] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step5000/mp_rank_00_model_states.pt +0: [2023-03-13 23:43:23,640] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:43:23,642] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:43:23,660] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:43:23,694] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,696] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,696] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,699] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +7: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:43:23,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +5: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:43:23,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,697] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,699] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,697] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,698] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,700] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,698] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,700] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:43:23,701] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,702] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:43:23,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,702] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +2: [2023-03-13 23:43:23,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +4: [2023-03-13 23:43:23,701] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +3: [2023-03-13 23:43:23,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:43:23,703] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:43:23,703] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:43:23,709] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,709] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,709] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:43:23,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +6: [2023-03-13 23:43:23,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,715] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,715] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,715] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:43:23,715] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,715] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: [2023-03-13 23:43:23,726] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:43:23,726] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,734] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,734] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,734] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +1: [2023-03-13 23:43:23,739] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:43:23,739] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step5000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:43:23,739] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now! +0: successfully saved checkpoint at iteration 5000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 424.20 +7: iteration 5010/ 21553 | consumed samples: 1282560 | consumed tokens: 2626682880 | elapsed time per iteration (s): 0.35 | learning rate: 1.785E-04 | global batch size: 256 | lm loss: 3.504597E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 737.868 | TFLOPs: 25.83 | +7: iteration 5020/ 21553 | consumed samples: 1285120 | consumed tokens: 2631925760 | elapsed time per iteration (s): 0.30 | learning rate: 1.784E-04 | global batch size: 256 | lm loss: 3.497611E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.890 | TFLOPs: 30.35 | +7: iteration 5030/ 21553 | consumed samples: 1287680 | consumed tokens: 2637168640 | elapsed time per iteration (s): 0.30 | learning rate: 1.783E-04 | global batch size: 256 | lm loss: 3.502938E+00 | grad norm: 0.461 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.842 | TFLOPs: 30.35 | +7: iteration 5040/ 21553 | consumed samples: 1290240 | consumed tokens: 2642411520 | elapsed time per iteration (s): 0.30 | learning rate: 1.782E-04 | global batch size: 256 | lm loss: 3.491778E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.877 | TFLOPs: 30.35 | +7: iteration 5050/ 21553 | consumed samples: 1292800 | consumed tokens: 2647654400 | elapsed time per iteration (s): 0.30 | learning rate: 1.781E-04 | global batch size: 256 | lm loss: 3.503128E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.927 | TFLOPs: 30.35 | +7: iteration 5060/ 21553 | consumed samples: 1295360 | consumed tokens: 2652897280 | elapsed time per iteration (s): 0.30 | learning rate: 1.781E-04 | global batch size: 256 | lm loss: 3.492990E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.949 | TFLOPs: 30.35 | +7: iteration 5070/ 21553 | consumed samples: 1297920 | consumed tokens: 2658140160 | elapsed time per iteration (s): 0.30 | learning rate: 1.780E-04 | global batch size: 256 | lm loss: 3.496559E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.660 | TFLOPs: 30.34 | +7: iteration 5080/ 21553 | consumed samples: 1300480 | consumed tokens: 2663383040 | elapsed time per iteration (s): 0.30 | learning rate: 1.779E-04 | global batch size: 256 | lm loss: 3.483953E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.729 | TFLOPs: 30.38 | +7: iteration 5090/ 21553 | consumed samples: 1303040 | consumed tokens: 2668625920 | elapsed time per iteration (s): 0.30 | learning rate: 1.778E-04 | global batch size: 256 | lm loss: 3.491275E+00 | grad norm: 0.385 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.338 | TFLOPs: 30.29 | +7: iteration 5100/ 21553 | consumed samples: 1305600 | consumed tokens: 2673868800 | elapsed time per iteration (s): 0.30 | learning rate: 1.777E-04 | global batch size: 256 | lm loss: 3.490646E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.250 | TFLOPs: 30.36 | +7: iteration 5110/ 21553 | consumed samples: 1308160 | consumed tokens: 2679111680 | elapsed time per iteration (s): 0.30 | learning rate: 1.776E-04 | global batch size: 256 | lm loss: 3.486938E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.405 | TFLOPs: 30.30 | +7: iteration 5120/ 21553 | consumed samples: 1310720 | consumed tokens: 2684354560 | elapsed time per iteration (s): 0.30 | learning rate: 1.775E-04 | global batch size: 256 | lm loss: 3.480979E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.001 | TFLOPs: 30.35 | +7: iteration 5130/ 21553 | consumed samples: 1313280 | consumed tokens: 2689597440 | elapsed time per iteration (s): 0.30 | learning rate: 1.775E-04 | global batch size: 256 | lm loss: 3.489679E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.826 | TFLOPs: 30.35 | +7: iteration 5140/ 21553 | consumed samples: 1315840 | consumed tokens: 2694840320 | elapsed time per iteration (s): 0.30 | learning rate: 1.774E-04 | global batch size: 256 | lm loss: 3.472094E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.145 | TFLOPs: 30.36 | +7: iteration 5150/ 21553 | consumed samples: 1318400 | consumed tokens: 2700083200 | elapsed time per iteration (s): 0.30 | learning rate: 1.773E-04 | global batch size: 256 | lm loss: 3.488367E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.315 | TFLOPs: 30.22 | +7: iteration 5160/ 21553 | consumed samples: 1320960 | consumed tokens: 2705326080 | elapsed time per iteration (s): 0.30 | learning rate: 1.772E-04 | global batch size: 256 | lm loss: 3.485703E+00 | grad norm: 0.477 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.735 | TFLOPs: 30.31 | +7: iteration 5170/ 21553 | consumed samples: 1323520 | consumed tokens: 2710568960 | elapsed time per iteration (s): 0.30 | learning rate: 1.771E-04 | global batch size: 256 | lm loss: 3.490656E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.442 | TFLOPs: 30.33 | +7: iteration 5180/ 21553 | consumed samples: 1326080 | consumed tokens: 2715811840 | elapsed time per iteration (s): 0.30 | learning rate: 1.770E-04 | global batch size: 256 | lm loss: 3.485231E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.071 | TFLOPs: 30.25 | +7: iteration 5190/ 21553 | consumed samples: 1328640 | consumed tokens: 2721054720 | elapsed time per iteration (s): 0.30 | learning rate: 1.769E-04 | global batch size: 256 | lm loss: 3.488507E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.677 | TFLOPs: 30.34 | +7: iteration 5200/ 21553 | consumed samples: 1331200 | consumed tokens: 2726297600 | elapsed time per iteration (s): 0.30 | learning rate: 1.768E-04 | global batch size: 256 | lm loss: 3.485680E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.764 | TFLOPs: 30.34 | +7: iteration 5210/ 21553 | consumed samples: 1333760 | consumed tokens: 2731540480 | elapsed time per iteration (s): 0.30 | learning rate: 1.767E-04 | global batch size: 256 | lm loss: 3.487176E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.416 | TFLOPs: 30.37 | +7: iteration 5220/ 21553 | consumed samples: 1336320 | consumed tokens: 2736783360 | elapsed time per iteration (s): 0.30 | learning rate: 1.767E-04 | global batch size: 256 | lm loss: 3.464733E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.731 | TFLOPs: 30.27 | +7: iteration 5230/ 21553 | consumed samples: 1338880 | consumed tokens: 2742026240 | elapsed time per iteration (s): 0.30 | learning rate: 1.766E-04 | global batch size: 256 | lm loss: 3.466660E+00 | grad norm: 0.481 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.846 | TFLOPs: 30.28 | +7: iteration 5240/ 21553 | consumed samples: 1341440 | consumed tokens: 2747269120 | elapsed time per iteration (s): 0.30 | learning rate: 1.765E-04 | global batch size: 256 | lm loss: 3.487032E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.719 | TFLOPs: 30.24 | +7: iteration 5250/ 21553 | consumed samples: 1344000 | consumed tokens: 2752512000 | elapsed time per iteration (s): 0.30 | learning rate: 1.764E-04 | global batch size: 256 | lm loss: 3.484139E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.261 | TFLOPs: 30.36 | +7: iteration 5260/ 21553 | consumed samples: 1346560 | consumed tokens: 2757754880 | elapsed time per iteration (s): 0.30 | learning rate: 1.763E-04 | global batch size: 256 | lm loss: 3.481749E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.063 | TFLOPs: 30.35 | +7: iteration 5270/ 21553 | consumed samples: 1349120 | consumed tokens: 2762997760 | elapsed time per iteration (s): 0.30 | learning rate: 1.762E-04 | global batch size: 256 | lm loss: 3.478778E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.831 | TFLOPs: 30.35 | +7: iteration 5280/ 21553 | consumed samples: 1351680 | consumed tokens: 2768240640 | elapsed time per iteration (s): 0.30 | learning rate: 1.761E-04 | global batch size: 256 | lm loss: 3.474100E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.662 | TFLOPs: 30.30 | +7: iteration 5290/ 21553 | consumed samples: 1354240 | consumed tokens: 2773483520 | elapsed time per iteration (s): 0.30 | learning rate: 1.760E-04 | global batch size: 256 | lm loss: 3.480971E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.369 | TFLOPs: 30.29 | +7: iteration 5300/ 21553 | consumed samples: 1356800 | consumed tokens: 2778726400 | elapsed time per iteration (s): 0.30 | learning rate: 1.759E-04 | global batch size: 256 | lm loss: 3.471391E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.534 | TFLOPs: 30.30 | +7: iteration 5310/ 21553 | consumed samples: 1359360 | consumed tokens: 2783969280 | elapsed time per iteration (s): 0.30 | learning rate: 1.758E-04 | global batch size: 256 | lm loss: 3.470039E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.260 | TFLOPs: 30.33 | +7: iteration 5320/ 21553 | consumed samples: 1361920 | consumed tokens: 2789212160 | elapsed time per iteration (s): 0.30 | learning rate: 1.758E-04 | global batch size: 256 | lm loss: 3.468952E+00 | grad norm: 0.472 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.452 | TFLOPs: 30.33 | +7: iteration 5330/ 21553 | consumed samples: 1364480 | consumed tokens: 2794455040 | elapsed time per iteration (s): 0.30 | learning rate: 1.757E-04 | global batch size: 256 | lm loss: 3.476561E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.280 | TFLOPs: 30.36 | +7: iteration 5340/ 21553 | consumed samples: 1367040 | consumed tokens: 2799697920 | elapsed time per iteration (s): 0.30 | learning rate: 1.756E-04 | global batch size: 256 | lm loss: 3.467901E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.399 | TFLOPs: 30.16 | +7: iteration 5350/ 21553 | consumed samples: 1369600 | consumed tokens: 2804940800 | elapsed time per iteration (s): 0.30 | learning rate: 1.755E-04 | global batch size: 256 | lm loss: 3.450629E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.446 | TFLOPs: 30.37 | +7: iteration 5360/ 21553 | consumed samples: 1372160 | consumed tokens: 2810183680 | elapsed time per iteration (s): 0.30 | learning rate: 1.754E-04 | global batch size: 256 | lm loss: 3.458858E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.929 | TFLOPs: 30.35 | +7: iteration 5370/ 21553 | consumed samples: 1374720 | consumed tokens: 2815426560 | elapsed time per iteration (s): 0.30 | learning rate: 1.753E-04 | global batch size: 256 | lm loss: 3.490764E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.442 | TFLOPs: 30.33 | +7: iteration 5380/ 21553 | consumed samples: 1377280 | consumed tokens: 2820669440 | elapsed time per iteration (s): 0.30 | learning rate: 1.752E-04 | global batch size: 256 | lm loss: 3.466106E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.755 | TFLOPs: 30.34 | +7: iteration 5390/ 21553 | consumed samples: 1379840 | consumed tokens: 2825912320 | elapsed time per iteration (s): 0.30 | learning rate: 1.751E-04 | global batch size: 256 | lm loss: 3.474661E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.570 | TFLOPs: 30.34 | +7: iteration 5400/ 21553 | consumed samples: 1382400 | consumed tokens: 2831155200 | elapsed time per iteration (s): 0.30 | learning rate: 1.750E-04 | global batch size: 256 | lm loss: 3.468237E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.494 | TFLOPs: 30.30 | +7: iteration 5410/ 21553 | consumed samples: 1384960 | consumed tokens: 2836398080 | elapsed time per iteration (s): 0.30 | learning rate: 1.749E-04 | global batch size: 256 | lm loss: 3.467458E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.854 | TFLOPs: 30.35 | +7: iteration 5420/ 21553 | consumed samples: 1387520 | consumed tokens: 2841640960 | elapsed time per iteration (s): 0.30 | learning rate: 1.748E-04 | global batch size: 256 | lm loss: 3.450253E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.929 | TFLOPs: 30.35 | +7: iteration 5430/ 21553 | consumed samples: 1390080 | consumed tokens: 2846883840 | elapsed time per iteration (s): 0.30 | learning rate: 1.748E-04 | global batch size: 256 | lm loss: 3.474409E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.120 | TFLOPs: 30.36 | +7: iteration 5440/ 21553 | consumed samples: 1392640 | consumed tokens: 2852126720 | elapsed time per iteration (s): 0.30 | learning rate: 1.747E-04 | global batch size: 256 | lm loss: 3.446040E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.365 | TFLOPs: 30.36 | +7: iteration 5450/ 21553 | consumed samples: 1395200 | consumed tokens: 2857369600 | elapsed time per iteration (s): 0.30 | learning rate: 1.746E-04 | global batch size: 256 | lm loss: 3.469004E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.046 | TFLOPs: 30.35 | +7: iteration 5460/ 21553 | consumed samples: 1397760 | consumed tokens: 2862612480 | elapsed time per iteration (s): 0.30 | learning rate: 1.745E-04 | global batch size: 256 | lm loss: 3.452534E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.265 | TFLOPs: 30.36 | +7: iteration 5470/ 21553 | consumed samples: 1400320 | consumed tokens: 2867855360 | elapsed time per iteration (s): 0.30 | learning rate: 1.744E-04 | global batch size: 256 | lm loss: 3.467144E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.388 | TFLOPs: 30.36 | +7: iteration 5480/ 21553 | consumed samples: 1402880 | consumed tokens: 2873098240 | elapsed time per iteration (s): 0.30 | learning rate: 1.743E-04 | global batch size: 256 | lm loss: 3.458614E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.127 | TFLOPs: 30.32 | +7: iteration 5490/ 21553 | consumed samples: 1405440 | consumed tokens: 2878341120 | elapsed time per iteration (s): 0.30 | learning rate: 1.742E-04 | global batch size: 256 | lm loss: 3.457297E+00 | grad norm: 0.539 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.663 | TFLOPs: 30.34 | +7: iteration 5500/ 21553 | consumed samples: 1408000 | consumed tokens: 2883584000 | elapsed time per iteration (s): 0.30 | learning rate: 1.741E-04 | global batch size: 256 | lm loss: 3.460509E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.449 | TFLOPs: 30.30 | +7: iteration 5510/ 21553 | consumed samples: 1410560 | consumed tokens: 2888826880 | elapsed time per iteration (s): 0.30 | learning rate: 1.740E-04 | global batch size: 256 | lm loss: 3.459804E+00 | grad norm: 0.391 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.063 | TFLOPs: 30.32 | +7: iteration 5520/ 21553 | consumed samples: 1413120 | consumed tokens: 2894069760 | elapsed time per iteration (s): 0.30 | learning rate: 1.739E-04 | global batch size: 256 | lm loss: 3.445074E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.270 | TFLOPs: 30.33 | +7: iteration 5530/ 21553 | consumed samples: 1415680 | consumed tokens: 2899312640 | elapsed time per iteration (s): 0.30 | learning rate: 1.738E-04 | global batch size: 256 | lm loss: 3.464245E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.341 | TFLOPs: 30.33 | +7: iteration 5540/ 21553 | consumed samples: 1418240 | consumed tokens: 2904555520 | elapsed time per iteration (s): 0.30 | learning rate: 1.737E-04 | global batch size: 256 | lm loss: 3.456888E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.721 | TFLOPs: 30.34 | +7: iteration 5550/ 21553 | consumed samples: 1420800 | consumed tokens: 2909798400 | elapsed time per iteration (s): 0.30 | learning rate: 1.736E-04 | global batch size: 256 | lm loss: 3.465444E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.426 | TFLOPs: 30.33 | +7: iteration 5560/ 21553 | consumed samples: 1423360 | consumed tokens: 2915041280 | elapsed time per iteration (s): 0.30 | learning rate: 1.735E-04 | global batch size: 256 | lm loss: 3.442826E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.224 | TFLOPs: 30.36 | +7: iteration 5570/ 21553 | consumed samples: 1425920 | consumed tokens: 2920284160 | elapsed time per iteration (s): 0.30 | learning rate: 1.735E-04 | global batch size: 256 | lm loss: 3.445588E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.043 | TFLOPs: 30.32 | +7: iteration 5580/ 21553 | consumed samples: 1428480 | consumed tokens: 2925527040 | elapsed time per iteration (s): 0.30 | learning rate: 1.734E-04 | global batch size: 256 | lm loss: 3.464400E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.099 | TFLOPs: 30.32 | +7: iteration 5590/ 21553 | consumed samples: 1431040 | consumed tokens: 2930769920 | elapsed time per iteration (s): 0.30 | learning rate: 1.733E-04 | global batch size: 256 | lm loss: 3.460657E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.531 | TFLOPs: 30.12 | +7: iteration 5600/ 21553 | consumed samples: 1433600 | consumed tokens: 2936012800 | elapsed time per iteration (s): 0.30 | learning rate: 1.732E-04 | global batch size: 256 | lm loss: 3.448066E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.369 | TFLOPs: 30.33 | +7: iteration 5610/ 21553 | consumed samples: 1436160 | consumed tokens: 2941255680 | elapsed time per iteration (s): 0.30 | learning rate: 1.731E-04 | global batch size: 256 | lm loss: 3.449020E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.539 | TFLOPs: 30.30 | +7: iteration 5620/ 21553 | consumed samples: 1438720 | consumed tokens: 2946498560 | elapsed time per iteration (s): 0.30 | learning rate: 1.730E-04 | global batch size: 256 | lm loss: 3.436217E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.637 | TFLOPs: 30.30 | +7: iteration 5630/ 21553 | consumed samples: 1441280 | consumed tokens: 2951741440 | elapsed time per iteration (s): 0.30 | learning rate: 1.729E-04 | global batch size: 256 | lm loss: 3.430839E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.171 | TFLOPs: 30.32 | +7: iteration 5640/ 21553 | consumed samples: 1443840 | consumed tokens: 2956984320 | elapsed time per iteration (s): 0.30 | learning rate: 1.728E-04 | global batch size: 256 | lm loss: 3.449997E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.703 | TFLOPs: 30.31 | +7: iteration 5650/ 21553 | consumed samples: 1446400 | consumed tokens: 2962227200 | elapsed time per iteration (s): 0.30 | learning rate: 1.727E-04 | global batch size: 256 | lm loss: 3.430530E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.366 | TFLOPs: 30.29 | +7: iteration 5660/ 21553 | consumed samples: 1448960 | consumed tokens: 2967470080 | elapsed time per iteration (s): 0.30 | learning rate: 1.726E-04 | global batch size: 256 | lm loss: 3.436266E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.429 | TFLOPs: 30.30 | +7: iteration 5670/ 21553 | consumed samples: 1451520 | consumed tokens: 2972712960 | elapsed time per iteration (s): 0.30 | learning rate: 1.725E-04 | global batch size: 256 | lm loss: 3.447434E+00 | grad norm: 0.480 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.204 | TFLOPs: 30.29 | +7: iteration 5680/ 21553 | consumed samples: 1454080 | consumed tokens: 2977955840 | elapsed time per iteration (s): 0.30 | learning rate: 1.724E-04 | global batch size: 256 | lm loss: 3.445545E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.437 | TFLOPs: 30.30 | +7: iteration 5690/ 21553 | consumed samples: 1456640 | consumed tokens: 2983198720 | elapsed time per iteration (s): 0.30 | learning rate: 1.723E-04 | global batch size: 256 | lm loss: 3.440984E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.779 | TFLOPs: 30.17 | +7: iteration 5700/ 21553 | consumed samples: 1459200 | consumed tokens: 2988441600 | elapsed time per iteration (s): 0.30 | learning rate: 1.722E-04 | global batch size: 256 | lm loss: 3.454313E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.955 | TFLOPs: 30.31 | +7: iteration 5710/ 21553 | consumed samples: 1461760 | consumed tokens: 2993684480 | elapsed time per iteration (s): 0.30 | learning rate: 1.721E-04 | global batch size: 256 | lm loss: 3.446544E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.357 | TFLOPs: 30.33 | +7: iteration 5720/ 21553 | consumed samples: 1464320 | consumed tokens: 2998927360 | elapsed time per iteration (s): 0.30 | learning rate: 1.720E-04 | global batch size: 256 | lm loss: 3.441927E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.811 | TFLOPs: 30.31 | +7: iteration 5730/ 21553 | consumed samples: 1466880 | consumed tokens: 3004170240 | elapsed time per iteration (s): 0.30 | learning rate: 1.719E-04 | global batch size: 256 | lm loss: 3.433743E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.374 | TFLOPs: 30.33 | +7: iteration 5740/ 21553 | consumed samples: 1469440 | consumed tokens: 3009413120 | elapsed time per iteration (s): 0.30 | learning rate: 1.718E-04 | global batch size: 256 | lm loss: 3.436022E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.192 | TFLOPs: 30.32 | +7: iteration 5750/ 21553 | consumed samples: 1472000 | consumed tokens: 3014656000 | elapsed time per iteration (s): 0.30 | learning rate: 1.717E-04 | global batch size: 256 | lm loss: 3.422438E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.064 | TFLOPs: 30.32 | +7: iteration 5760/ 21553 | consumed samples: 1474560 | consumed tokens: 3019898880 | elapsed time per iteration (s): 0.30 | learning rate: 1.716E-04 | global batch size: 256 | lm loss: 3.436783E+00 | grad norm: 0.486 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.756 | TFLOPs: 30.24 | +7: iteration 5770/ 21553 | consumed samples: 1477120 | consumed tokens: 3025141760 | elapsed time per iteration (s): 0.30 | learning rate: 1.715E-04 | global batch size: 256 | lm loss: 3.454999E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.968 | TFLOPs: 30.25 | +7: iteration 5780/ 21553 | consumed samples: 1479680 | consumed tokens: 3030384640 | elapsed time per iteration (s): 0.30 | learning rate: 1.714E-04 | global batch size: 256 | lm loss: 3.426170E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.715 | TFLOPs: 30.24 | +7: iteration 5790/ 21553 | consumed samples: 1482240 | consumed tokens: 3035627520 | elapsed time per iteration (s): 0.30 | learning rate: 1.714E-04 | global batch size: 256 | lm loss: 3.438084E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.100 | TFLOPs: 30.25 | +7: iteration 5800/ 21553 | consumed samples: 1484800 | consumed tokens: 3040870400 | elapsed time per iteration (s): 0.30 | learning rate: 1.713E-04 | global batch size: 256 | lm loss: 3.428118E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.106 | TFLOPs: 30.28 | +7: iteration 5810/ 21553 | consumed samples: 1487360 | consumed tokens: 3046113280 | elapsed time per iteration (s): 0.30 | learning rate: 1.712E-04 | global batch size: 256 | lm loss: 3.437422E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.182 | TFLOPs: 30.32 | +7: iteration 5820/ 21553 | consumed samples: 1489920 | consumed tokens: 3051356160 | elapsed time per iteration (s): 0.30 | learning rate: 1.711E-04 | global batch size: 256 | lm loss: 3.425530E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.728 | TFLOPs: 30.34 | +7: iteration 5830/ 21553 | consumed samples: 1492480 | consumed tokens: 3056599040 | elapsed time per iteration (s): 0.30 | learning rate: 1.710E-04 | global batch size: 256 | lm loss: 3.422105E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.892 | TFLOPs: 30.31 | +7: iteration 5840/ 21553 | consumed samples: 1495040 | consumed tokens: 3061841920 | elapsed time per iteration (s): 0.30 | learning rate: 1.709E-04 | global batch size: 256 | lm loss: 3.426194E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.576 | TFLOPs: 30.34 | +7: iteration 5850/ 21553 | consumed samples: 1497600 | consumed tokens: 3067084800 | elapsed time per iteration (s): 0.30 | learning rate: 1.708E-04 | global batch size: 256 | lm loss: 3.425577E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.280 | TFLOPs: 30.33 | +7: iteration 5860/ 21553 | consumed samples: 1500160 | consumed tokens: 3072327680 | elapsed time per iteration (s): 0.30 | learning rate: 1.707E-04 | global batch size: 256 | lm loss: 3.429363E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.735 | TFLOPs: 30.34 | +7: iteration 5870/ 21553 | consumed samples: 1502720 | consumed tokens: 3077570560 | elapsed time per iteration (s): 0.30 | learning rate: 1.706E-04 | global batch size: 256 | lm loss: 3.429609E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.814 | TFLOPs: 30.31 | +7: iteration 5880/ 21553 | consumed samples: 1505280 | consumed tokens: 3082813440 | elapsed time per iteration (s): 0.30 | learning rate: 1.705E-04 | global batch size: 256 | lm loss: 3.416947E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.223 | TFLOPs: 30.32 | +7: iteration 5890/ 21553 | consumed samples: 1507840 | consumed tokens: 3088056320 | elapsed time per iteration (s): 0.30 | learning rate: 1.704E-04 | global batch size: 256 | lm loss: 3.423700E+00 | grad norm: 0.488 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.029 | TFLOPs: 30.35 | +7: iteration 5900/ 21553 | consumed samples: 1510400 | consumed tokens: 3093299200 | elapsed time per iteration (s): 0.30 | learning rate: 1.703E-04 | global batch size: 256 | lm loss: 3.425647E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.973 | TFLOPs: 30.35 | +7: iteration 5910/ 21553 | consumed samples: 1512960 | consumed tokens: 3098542080 | elapsed time per iteration (s): 0.30 | learning rate: 1.702E-04 | global batch size: 256 | lm loss: 3.425323E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.247 | TFLOPs: 30.36 | +7: iteration 5920/ 21553 | consumed samples: 1515520 | consumed tokens: 3103784960 | elapsed time per iteration (s): 0.30 | learning rate: 1.701E-04 | global batch size: 256 | lm loss: 3.424964E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.858 | TFLOPs: 30.35 | +7: iteration 5930/ 21553 | consumed samples: 1518080 | consumed tokens: 3109027840 | elapsed time per iteration (s): 0.30 | learning rate: 1.700E-04 | global batch size: 256 | lm loss: 3.415997E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.430 | TFLOPs: 30.33 | +7: iteration 5940/ 21553 | consumed samples: 1520640 | consumed tokens: 3114270720 | elapsed time per iteration (s): 0.30 | learning rate: 1.699E-04 | global batch size: 256 | lm loss: 3.423136E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.133 | TFLOPs: 30.36 | +7: iteration 5950/ 21553 | consumed samples: 1523200 | consumed tokens: 3119513600 | elapsed time per iteration (s): 0.29 | learning rate: 1.698E-04 | global batch size: 256 | lm loss: 3.418628E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.130 | TFLOPs: 30.39 | +7: iteration 5960/ 21553 | consumed samples: 1525760 | consumed tokens: 3124756480 | elapsed time per iteration (s): 0.30 | learning rate: 1.697E-04 | global batch size: 256 | lm loss: 3.428139E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.675 | TFLOPs: 30.34 | +7: iteration 5970/ 21553 | consumed samples: 1528320 | consumed tokens: 3129999360 | elapsed time per iteration (s): 0.30 | learning rate: 1.696E-04 | global batch size: 256 | lm loss: 3.416125E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.535 | TFLOPs: 30.33 | +7: iteration 5980/ 21553 | consumed samples: 1530880 | consumed tokens: 3135242240 | elapsed time per iteration (s): 0.30 | learning rate: 1.695E-04 | global batch size: 256 | lm loss: 3.436431E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.292 | TFLOPs: 30.33 | +7: iteration 5990/ 21553 | consumed samples: 1533440 | consumed tokens: 3140485120 | elapsed time per iteration (s): 0.30 | learning rate: 1.694E-04 | global batch size: 256 | lm loss: 3.426907E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.165 | TFLOPs: 30.32 | +0: [2023-03-13 23:48:19,316] [INFO] [logging.py:68:log_dist] [Rank 0] step=6000, skipped=0, lr=[0.00016928570742907802, 0.00016928570742907802, 0.00016928570742907802], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 6000/ 21553 | consumed samples: 1536000 | consumed tokens: 3145728000 | elapsed time per iteration (s): 0.30 | learning rate: 1.693E-04 | global batch size: 256 | lm loss: 3.426998E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.728 | TFLOPs: 30.34 | +0: steps: 6000 loss: 3.4472 iter time (s): 0.293 samples/sec: 872.699 +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 6000 | lm loss value: 3.773229E+00 | lm loss PPL: 4.352038E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 6000 to checkpoints_146m14b100m +0: [2023-03-13 23:48:19,436] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step6000 is begin to save! +0: [2023-03-13 23:48:19,440] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:48:19,523] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:48:19,524] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:48:19,539] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:48:19,539] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:48:19,554] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:48:19,554] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:48:19,569] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:48:19,569] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:48:19,584] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:48:19,584] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:48:19,599] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:48:19,599] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:48:19,614] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:48:19,614] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:48:19,629] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:48:19,629] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:48:19,644] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:48:19,644] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:48:19,659] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:48:19,659] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:48:19,674] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:48:19,674] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:48:19,689] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:48:19,689] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:48:19,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:48:19,704] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:48:19,719] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:48:19,719] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:48:19,734] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:48:19,734] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:48:19,749] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:48:19,749] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:48:19,750] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:48:19,751] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step6000/mp_rank_00_model_states.pt +0: [2023-03-13 23:48:19,751] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:48:19,753] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:48:19,770] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:48:19,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,803] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,803] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,803] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,806] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,806] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,806] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,806] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,806] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,806] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,810] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,810] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:48:19,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:48:19,813] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,813] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +1: [2023-03-13 23:48:19,813] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +3: [2023-03-13 23:48:19,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:48:19,814] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-13 23:48:19,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:48:19,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +7: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +6: [2023-03-13 23:48:19,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:48:19,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +2: [2023-03-13 23:48:19,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:48:19,817] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-13 23:48:19,817] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,816] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,816] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,826] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,826] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,826] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,826] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:48:19,835] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,835] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,835] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,835] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,835] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: [2023-03-13 23:48:19,837] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:48:19,837] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,838] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,838] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,838] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,840] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,840] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,840] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +5: [2023-03-13 23:48:19,842] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:48:19,842] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:48:19,843] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +4: [2023-03-13 23:48:19,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:48:19,845] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step6000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:48:19,845] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step6000 is ready now! +0: successfully saved checkpoint at iteration 6000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 413.82 +7: iteration 6010/ 21553 | consumed samples: 1538560 | consumed tokens: 3150970880 | elapsed time per iteration (s): 0.35 | learning rate: 1.692E-04 | global batch size: 256 | lm loss: 3.426365E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 737.368 | TFLOPs: 25.81 | +7: iteration 6020/ 21553 | consumed samples: 1541120 | consumed tokens: 3156213760 | elapsed time per iteration (s): 0.30 | learning rate: 1.691E-04 | global batch size: 256 | lm loss: 3.402069E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.118 | TFLOPs: 30.36 | +7: iteration 6030/ 21553 | consumed samples: 1543680 | consumed tokens: 3161456640 | elapsed time per iteration (s): 0.30 | learning rate: 1.690E-04 | global batch size: 256 | lm loss: 3.422090E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.955 | TFLOPs: 30.35 | +7: iteration 6040/ 21553 | consumed samples: 1546240 | consumed tokens: 3166699520 | elapsed time per iteration (s): 0.30 | learning rate: 1.689E-04 | global batch size: 256 | lm loss: 3.410770E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.812 | TFLOPs: 30.34 | +7: iteration 6050/ 21553 | consumed samples: 1548800 | consumed tokens: 3171942400 | elapsed time per iteration (s): 0.30 | learning rate: 1.688E-04 | global batch size: 256 | lm loss: 3.427681E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.442 | TFLOPs: 30.33 | +7: iteration 6060/ 21553 | consumed samples: 1551360 | consumed tokens: 3177185280 | elapsed time per iteration (s): 0.30 | learning rate: 1.687E-04 | global batch size: 256 | lm loss: 3.411016E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.864 | TFLOPs: 30.35 | +7: iteration 6070/ 21553 | consumed samples: 1553920 | consumed tokens: 3182428160 | elapsed time per iteration (s): 0.30 | learning rate: 1.686E-04 | global batch size: 256 | lm loss: 3.398279E+00 | grad norm: 0.477 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.536 | TFLOPs: 30.34 | +7: iteration 6080/ 21553 | consumed samples: 1556480 | consumed tokens: 3187671040 | elapsed time per iteration (s): 0.30 | learning rate: 1.685E-04 | global batch size: 256 | lm loss: 3.413648E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.911 | TFLOPs: 30.31 | +7: iteration 6090/ 21553 | consumed samples: 1559040 | consumed tokens: 3192913920 | elapsed time per iteration (s): 0.30 | learning rate: 1.684E-04 | global batch size: 256 | lm loss: 3.407844E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.232 | TFLOPs: 30.32 | +7: iteration 6100/ 21553 | consumed samples: 1561600 | consumed tokens: 3198156800 | elapsed time per iteration (s): 0.30 | learning rate: 1.683E-04 | global batch size: 256 | lm loss: 3.396289E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.191 | TFLOPs: 30.32 | +7: iteration 6110/ 21553 | consumed samples: 1564160 | consumed tokens: 3203399680 | elapsed time per iteration (s): 0.30 | learning rate: 1.682E-04 | global batch size: 256 | lm loss: 3.410084E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.012 | TFLOPs: 30.14 | +7: iteration 6120/ 21553 | consumed samples: 1566720 | consumed tokens: 3208642560 | elapsed time per iteration (s): 0.30 | learning rate: 1.681E-04 | global batch size: 256 | lm loss: 3.402946E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.221 | TFLOPs: 30.36 | +7: iteration 6130/ 21553 | consumed samples: 1569280 | consumed tokens: 3213885440 | elapsed time per iteration (s): 0.30 | learning rate: 1.680E-04 | global batch size: 256 | lm loss: 3.408965E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.287 | TFLOPs: 30.33 | +7: iteration 6140/ 21553 | consumed samples: 1571840 | consumed tokens: 3219128320 | elapsed time per iteration (s): 0.30 | learning rate: 1.679E-04 | global batch size: 256 | lm loss: 3.415231E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.001 | TFLOPs: 30.35 | +7: iteration 6150/ 21553 | consumed samples: 1574400 | consumed tokens: 3224371200 | elapsed time per iteration (s): 0.30 | learning rate: 1.678E-04 | global batch size: 256 | lm loss: 3.406755E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.438 | TFLOPs: 30.33 | +7: iteration 6160/ 21553 | consumed samples: 1576960 | consumed tokens: 3229614080 | elapsed time per iteration (s): 0.30 | learning rate: 1.677E-04 | global batch size: 256 | lm loss: 3.414660E+00 | grad norm: 0.482 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.501 | TFLOPs: 30.33 | +7: iteration 6170/ 21553 | consumed samples: 1579520 | consumed tokens: 3234856960 | elapsed time per iteration (s): 0.30 | learning rate: 1.676E-04 | global batch size: 256 | lm loss: 3.413341E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.806 | TFLOPs: 30.34 | +7: iteration 6180/ 21553 | consumed samples: 1582080 | consumed tokens: 3240099840 | elapsed time per iteration (s): 0.30 | learning rate: 1.675E-04 | global batch size: 256 | lm loss: 3.415628E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.537 | TFLOPs: 30.34 | +7: iteration 6190/ 21553 | consumed samples: 1584640 | consumed tokens: 3245342720 | elapsed time per iteration (s): 0.30 | learning rate: 1.674E-04 | global batch size: 256 | lm loss: 3.420128E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.521 | TFLOPs: 30.33 | +7: iteration 6200/ 21553 | consumed samples: 1587200 | consumed tokens: 3250585600 | elapsed time per iteration (s): 0.30 | learning rate: 1.673E-04 | global batch size: 256 | lm loss: 3.408151E+00 | grad norm: 0.511 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.883 | TFLOPs: 30.35 | +7: iteration 6210/ 21553 | consumed samples: 1589760 | consumed tokens: 3255828480 | elapsed time per iteration (s): 0.30 | learning rate: 1.672E-04 | global batch size: 256 | lm loss: 3.397389E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.769 | TFLOPs: 30.34 | +7: iteration 6220/ 21553 | consumed samples: 1592320 | consumed tokens: 3261071360 | elapsed time per iteration (s): 0.30 | learning rate: 1.671E-04 | global batch size: 256 | lm loss: 3.406784E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.362 | TFLOPs: 30.33 | +7: iteration 6230/ 21553 | consumed samples: 1594880 | consumed tokens: 3266314240 | elapsed time per iteration (s): 0.30 | learning rate: 1.670E-04 | global batch size: 256 | lm loss: 3.422365E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.004 | TFLOPs: 29.97 | +7: iteration 6240/ 21553 | consumed samples: 1597440 | consumed tokens: 3271557120 | elapsed time per iteration (s): 0.30 | learning rate: 1.669E-04 | global batch size: 256 | lm loss: 3.404950E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.950 | TFLOPs: 30.35 | +7: iteration 6250/ 21553 | consumed samples: 1600000 | consumed tokens: 3276800000 | elapsed time per iteration (s): 0.30 | learning rate: 1.668E-04 | global batch size: 256 | lm loss: 3.402754E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.473 | TFLOPs: 30.33 | +7: iteration 6260/ 21553 | consumed samples: 1602560 | consumed tokens: 3282042880 | elapsed time per iteration (s): 0.30 | learning rate: 1.667E-04 | global batch size: 256 | lm loss: 3.401994E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.262 | TFLOPs: 30.33 | +7: iteration 6270/ 21553 | consumed samples: 1605120 | consumed tokens: 3287285760 | elapsed time per iteration (s): 0.30 | learning rate: 1.665E-04 | global batch size: 256 | lm loss: 3.404184E+00 | grad norm: 0.384 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.457 | TFLOPs: 30.33 | +7: iteration 6280/ 21553 | consumed samples: 1607680 | consumed tokens: 3292528640 | elapsed time per iteration (s): 0.30 | learning rate: 1.664E-04 | global batch size: 256 | lm loss: 3.397058E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.048 | TFLOPs: 30.32 | +7: iteration 6290/ 21553 | consumed samples: 1610240 | consumed tokens: 3297771520 | elapsed time per iteration (s): 0.30 | learning rate: 1.663E-04 | global batch size: 256 | lm loss: 3.402051E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.346 | TFLOPs: 30.33 | +7: iteration 6300/ 21553 | consumed samples: 1612800 | consumed tokens: 3303014400 | elapsed time per iteration (s): 0.30 | learning rate: 1.662E-04 | global batch size: 256 | lm loss: 3.401057E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.370 | TFLOPs: 30.33 | +7: iteration 6310/ 21553 | consumed samples: 1615360 | consumed tokens: 3308257280 | elapsed time per iteration (s): 0.30 | learning rate: 1.661E-04 | global batch size: 256 | lm loss: 3.389648E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.313 | TFLOPs: 30.33 | +7: iteration 6320/ 21553 | consumed samples: 1617920 | consumed tokens: 3313500160 | elapsed time per iteration (s): 0.30 | learning rate: 1.660E-04 | global batch size: 256 | lm loss: 3.393035E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.900 | TFLOPs: 30.35 | +7: iteration 6330/ 21553 | consumed samples: 1620480 | consumed tokens: 3318743040 | elapsed time per iteration (s): 0.30 | learning rate: 1.659E-04 | global batch size: 256 | lm loss: 3.402215E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.088 | TFLOPs: 30.35 | +7: iteration 6340/ 21553 | consumed samples: 1623040 | consumed tokens: 3323985920 | elapsed time per iteration (s): 0.30 | learning rate: 1.658E-04 | global batch size: 256 | lm loss: 3.414987E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.285 | TFLOPs: 30.36 | +7: iteration 6350/ 21553 | consumed samples: 1625600 | consumed tokens: 3329228800 | elapsed time per iteration (s): 0.30 | learning rate: 1.657E-04 | global batch size: 256 | lm loss: 3.394749E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.493 | TFLOPs: 30.33 | +7: iteration 6360/ 21553 | consumed samples: 1628160 | consumed tokens: 3334471680 | elapsed time per iteration (s): 0.30 | learning rate: 1.656E-04 | global batch size: 256 | lm loss: 3.391602E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.159 | TFLOPs: 30.32 | +7: iteration 6370/ 21553 | consumed samples: 1630720 | consumed tokens: 3339714560 | elapsed time per iteration (s): 0.30 | learning rate: 1.655E-04 | global batch size: 256 | lm loss: 3.402840E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.263 | TFLOPs: 30.08 | +7: iteration 6380/ 21553 | consumed samples: 1633280 | consumed tokens: 3344957440 | elapsed time per iteration (s): 0.30 | learning rate: 1.654E-04 | global batch size: 256 | lm loss: 3.392649E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.449 | TFLOPs: 30.33 | +7: iteration 6390/ 21553 | consumed samples: 1635840 | consumed tokens: 3350200320 | elapsed time per iteration (s): 0.30 | learning rate: 1.653E-04 | global batch size: 256 | lm loss: 3.395212E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.254 | TFLOPs: 30.33 | +7: iteration 6400/ 21553 | consumed samples: 1638400 | consumed tokens: 3355443200 | elapsed time per iteration (s): 0.30 | learning rate: 1.652E-04 | global batch size: 256 | lm loss: 3.393875E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.401 | TFLOPs: 30.33 | +7: iteration 6410/ 21553 | consumed samples: 1640960 | consumed tokens: 3360686080 | elapsed time per iteration (s): 0.30 | learning rate: 1.651E-04 | global batch size: 256 | lm loss: 3.391862E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.702 | TFLOPs: 30.34 | +7: iteration 6420/ 21553 | consumed samples: 1643520 | consumed tokens: 3365928960 | elapsed time per iteration (s): 0.30 | learning rate: 1.650E-04 | global batch size: 256 | lm loss: 3.381042E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.476 | TFLOPs: 30.33 | +7: iteration 6430/ 21553 | consumed samples: 1646080 | consumed tokens: 3371171840 | elapsed time per iteration (s): 0.30 | learning rate: 1.649E-04 | global batch size: 256 | lm loss: 3.389692E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.304 | TFLOPs: 30.33 | +7: iteration 6440/ 21553 | consumed samples: 1648640 | consumed tokens: 3376414720 | elapsed time per iteration (s): 0.30 | learning rate: 1.648E-04 | global batch size: 256 | lm loss: 3.390417E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.864 | TFLOPs: 30.31 | +7: iteration 6450/ 21553 | consumed samples: 1651200 | consumed tokens: 3381657600 | elapsed time per iteration (s): 0.30 | learning rate: 1.647E-04 | global batch size: 256 | lm loss: 3.390864E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.299 | TFLOPs: 30.29 | +7: iteration 6460/ 21553 | consumed samples: 1653760 | consumed tokens: 3386900480 | elapsed time per iteration (s): 0.30 | learning rate: 1.646E-04 | global batch size: 256 | lm loss: 3.396147E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.800 | TFLOPs: 30.31 | +7: iteration 6470/ 21553 | consumed samples: 1656320 | consumed tokens: 3392143360 | elapsed time per iteration (s): 0.30 | learning rate: 1.645E-04 | global batch size: 256 | lm loss: 3.381123E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.488 | TFLOPs: 30.30 | +7: iteration 6480/ 21553 | consumed samples: 1658880 | consumed tokens: 3397386240 | elapsed time per iteration (s): 0.30 | learning rate: 1.644E-04 | global batch size: 256 | lm loss: 3.392856E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.496 | TFLOPs: 30.30 | +7: iteration 6490/ 21553 | consumed samples: 1661440 | consumed tokens: 3402629120 | elapsed time per iteration (s): 0.30 | learning rate: 1.643E-04 | global batch size: 256 | lm loss: 3.380826E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.800 | TFLOPs: 30.31 | +7: iteration 6500/ 21553 | consumed samples: 1664000 | consumed tokens: 3407872000 | elapsed time per iteration (s): 0.30 | learning rate: 1.641E-04 | global batch size: 256 | lm loss: 3.370744E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.709 | TFLOPs: 30.31 | +7: iteration 6510/ 21553 | consumed samples: 1666560 | consumed tokens: 3413114880 | elapsed time per iteration (s): 0.30 | learning rate: 1.640E-04 | global batch size: 256 | lm loss: 3.371986E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.875 | TFLOPs: 30.31 | +7: iteration 6520/ 21553 | consumed samples: 1669120 | consumed tokens: 3418357760 | elapsed time per iteration (s): 0.30 | learning rate: 1.639E-04 | global batch size: 256 | lm loss: 3.388424E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.565 | TFLOPs: 30.30 | +7: iteration 6530/ 21553 | consumed samples: 1671680 | consumed tokens: 3423600640 | elapsed time per iteration (s): 0.30 | learning rate: 1.638E-04 | global batch size: 256 | lm loss: 3.393537E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.717 | TFLOPs: 30.31 | +7: iteration 6540/ 21553 | consumed samples: 1674240 | consumed tokens: 3428843520 | elapsed time per iteration (s): 0.30 | learning rate: 1.637E-04 | global batch size: 256 | lm loss: 3.383491E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.699 | TFLOPs: 30.03 | +7: iteration 6550/ 21553 | consumed samples: 1676800 | consumed tokens: 3434086400 | elapsed time per iteration (s): 0.30 | learning rate: 1.636E-04 | global batch size: 256 | lm loss: 3.379226E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.584 | TFLOPs: 30.34 | +7: iteration 6560/ 21553 | consumed samples: 1679360 | consumed tokens: 3439329280 | elapsed time per iteration (s): 0.30 | learning rate: 1.635E-04 | global batch size: 256 | lm loss: 3.366959E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.361 | TFLOPs: 30.29 | +7: iteration 6570/ 21553 | consumed samples: 1681920 | consumed tokens: 3444572160 | elapsed time per iteration (s): 0.30 | learning rate: 1.634E-04 | global batch size: 256 | lm loss: 3.378254E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.866 | TFLOPs: 30.31 | +7: iteration 6580/ 21553 | consumed samples: 1684480 | consumed tokens: 3449815040 | elapsed time per iteration (s): 0.30 | learning rate: 1.633E-04 | global batch size: 256 | lm loss: 3.388710E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.373 | TFLOPs: 30.33 | +7: iteration 6590/ 21553 | consumed samples: 1687040 | consumed tokens: 3455057920 | elapsed time per iteration (s): 0.30 | learning rate: 1.632E-04 | global batch size: 256 | lm loss: 3.374388E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.731 | TFLOPs: 30.31 | +7: iteration 6600/ 21553 | consumed samples: 1689600 | consumed tokens: 3460300800 | elapsed time per iteration (s): 0.30 | learning rate: 1.631E-04 | global batch size: 256 | lm loss: 3.385966E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.041 | TFLOPs: 30.32 | +7: iteration 6610/ 21553 | consumed samples: 1692160 | consumed tokens: 3465543680 | elapsed time per iteration (s): 0.30 | learning rate: 1.630E-04 | global batch size: 256 | lm loss: 3.385135E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.475 | TFLOPs: 30.30 | +7: iteration 6620/ 21553 | consumed samples: 1694720 | consumed tokens: 3470786560 | elapsed time per iteration (s): 0.30 | learning rate: 1.629E-04 | global batch size: 256 | lm loss: 3.391126E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.657 | TFLOPs: 30.20 | +7: iteration 6630/ 21553 | consumed samples: 1697280 | consumed tokens: 3476029440 | elapsed time per iteration (s): 0.30 | learning rate: 1.628E-04 | global batch size: 256 | lm loss: 3.385999E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.010 | TFLOPs: 30.25 | +7: iteration 6640/ 21553 | consumed samples: 1699840 | consumed tokens: 3481272320 | elapsed time per iteration (s): 0.30 | learning rate: 1.627E-04 | global batch size: 256 | lm loss: 3.372669E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.498 | TFLOPs: 30.30 | +7: iteration 6650/ 21553 | consumed samples: 1702400 | consumed tokens: 3486515200 | elapsed time per iteration (s): 0.30 | learning rate: 1.625E-04 | global batch size: 256 | lm loss: 3.367501E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.724 | TFLOPs: 30.27 | +7: iteration 6660/ 21553 | consumed samples: 1704960 | consumed tokens: 3491758080 | elapsed time per iteration (s): 0.30 | learning rate: 1.624E-04 | global batch size: 256 | lm loss: 3.363454E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.456 | TFLOPs: 30.26 | +7: iteration 6670/ 21553 | consumed samples: 1707520 | consumed tokens: 3497000960 | elapsed time per iteration (s): 0.30 | learning rate: 1.623E-04 | global batch size: 256 | lm loss: 3.366405E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.416 | TFLOPs: 30.33 | +7: iteration 6680/ 21553 | consumed samples: 1710080 | consumed tokens: 3502243840 | elapsed time per iteration (s): 0.30 | learning rate: 1.622E-04 | global batch size: 256 | lm loss: 3.369009E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.902 | TFLOPs: 30.31 | +7: iteration 6690/ 21553 | consumed samples: 1712640 | consumed tokens: 3507486720 | elapsed time per iteration (s): 0.30 | learning rate: 1.621E-04 | global batch size: 256 | lm loss: 3.372279E+00 | grad norm: 0.376 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.365 | TFLOPs: 30.19 | +7: iteration 6700/ 21553 | consumed samples: 1715200 | consumed tokens: 3512729600 | elapsed time per iteration (s): 0.30 | learning rate: 1.620E-04 | global batch size: 256 | lm loss: 3.369227E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.663 | TFLOPs: 30.27 | +7: iteration 6710/ 21553 | consumed samples: 1717760 | consumed tokens: 3517972480 | elapsed time per iteration (s): 0.30 | learning rate: 1.619E-04 | global batch size: 256 | lm loss: 3.365274E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.658 | TFLOPs: 30.27 | +7: iteration 6720/ 21553 | consumed samples: 1720320 | consumed tokens: 3523215360 | elapsed time per iteration (s): 0.30 | learning rate: 1.618E-04 | global batch size: 256 | lm loss: 3.365992E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.953 | TFLOPs: 30.31 | +7: iteration 6730/ 21553 | consumed samples: 1722880 | consumed tokens: 3528458240 | elapsed time per iteration (s): 0.30 | learning rate: 1.617E-04 | global batch size: 256 | lm loss: 3.364433E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.888 | TFLOPs: 30.35 | +7: iteration 6740/ 21553 | consumed samples: 1725440 | consumed tokens: 3533701120 | elapsed time per iteration (s): 0.30 | learning rate: 1.616E-04 | global batch size: 256 | lm loss: 3.369408E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.932 | TFLOPs: 30.35 | +7: iteration 6750/ 21553 | consumed samples: 1728000 | consumed tokens: 3538944000 | elapsed time per iteration (s): 0.30 | learning rate: 1.615E-04 | global batch size: 256 | lm loss: 3.366309E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.817 | TFLOPs: 30.34 | +7: iteration 6760/ 21553 | consumed samples: 1730560 | consumed tokens: 3544186880 | elapsed time per iteration (s): 0.30 | learning rate: 1.614E-04 | global batch size: 256 | lm loss: 3.352961E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.153 | TFLOPs: 30.36 | +7: iteration 6770/ 21553 | consumed samples: 1733120 | consumed tokens: 3549429760 | elapsed time per iteration (s): 0.30 | learning rate: 1.612E-04 | global batch size: 256 | lm loss: 3.365722E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.796 | TFLOPs: 30.34 | +7: iteration 6780/ 21553 | consumed samples: 1735680 | consumed tokens: 3554672640 | elapsed time per iteration (s): 0.30 | learning rate: 1.611E-04 | global batch size: 256 | lm loss: 3.353357E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.614 | TFLOPs: 30.34 | +7: iteration 6790/ 21553 | consumed samples: 1738240 | consumed tokens: 3559915520 | elapsed time per iteration (s): 0.30 | learning rate: 1.610E-04 | global batch size: 256 | lm loss: 3.369699E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.684 | TFLOPs: 30.34 | +7: iteration 6800/ 21553 | consumed samples: 1740800 | consumed tokens: 3565158400 | elapsed time per iteration (s): 0.30 | learning rate: 1.609E-04 | global batch size: 256 | lm loss: 3.367351E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.171 | TFLOPs: 30.32 | +7: iteration 6810/ 21553 | consumed samples: 1743360 | consumed tokens: 3570401280 | elapsed time per iteration (s): 0.30 | learning rate: 1.608E-04 | global batch size: 256 | lm loss: 3.359808E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.344 | TFLOPs: 30.33 | +7: iteration 6820/ 21553 | consumed samples: 1745920 | consumed tokens: 3575644160 | elapsed time per iteration (s): 0.30 | learning rate: 1.607E-04 | global batch size: 256 | lm loss: 3.366811E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.863 | TFLOPs: 30.31 | +7: iteration 6830/ 21553 | consumed samples: 1748480 | consumed tokens: 3580887040 | elapsed time per iteration (s): 0.30 | learning rate: 1.606E-04 | global batch size: 256 | lm loss: 3.360229E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.239 | TFLOPs: 30.32 | +7: iteration 6840/ 21553 | consumed samples: 1751040 | consumed tokens: 3586129920 | elapsed time per iteration (s): 0.30 | learning rate: 1.605E-04 | global batch size: 256 | lm loss: 3.355415E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.642 | TFLOPs: 30.34 | +7: iteration 6850/ 21553 | consumed samples: 1753600 | consumed tokens: 3591372800 | elapsed time per iteration (s): 0.30 | learning rate: 1.604E-04 | global batch size: 256 | lm loss: 3.361308E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.783 | TFLOPs: 30.34 | +7: iteration 6860/ 21553 | consumed samples: 1756160 | consumed tokens: 3596615680 | elapsed time per iteration (s): 0.30 | learning rate: 1.603E-04 | global batch size: 256 | lm loss: 3.368105E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.503 | TFLOPs: 30.33 | +7: iteration 6870/ 21553 | consumed samples: 1758720 | consumed tokens: 3601858560 | elapsed time per iteration (s): 0.30 | learning rate: 1.601E-04 | global batch size: 256 | lm loss: 3.358687E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.956 | TFLOPs: 30.35 | +7: iteration 6880/ 21553 | consumed samples: 1761280 | consumed tokens: 3607101440 | elapsed time per iteration (s): 0.30 | learning rate: 1.600E-04 | global batch size: 256 | lm loss: 3.364072E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.849 | TFLOPs: 30.35 | +7: iteration 6890/ 21553 | consumed samples: 1763840 | consumed tokens: 3612344320 | elapsed time per iteration (s): 0.30 | learning rate: 1.599E-04 | global batch size: 256 | lm loss: 3.362907E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.407 | TFLOPs: 30.33 | +7: iteration 6900/ 21553 | consumed samples: 1766400 | consumed tokens: 3617587200 | elapsed time per iteration (s): 0.30 | learning rate: 1.598E-04 | global batch size: 256 | lm loss: 3.353676E+00 | grad norm: 0.482 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.362 | TFLOPs: 30.33 | +7: iteration 6910/ 21553 | consumed samples: 1768960 | consumed tokens: 3622830080 | elapsed time per iteration (s): 0.30 | learning rate: 1.597E-04 | global batch size: 256 | lm loss: 3.357927E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.802 | TFLOPs: 30.34 | +7: iteration 6920/ 21553 | consumed samples: 1771520 | consumed tokens: 3628072960 | elapsed time per iteration (s): 0.30 | learning rate: 1.596E-04 | global batch size: 256 | lm loss: 3.357727E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.260 | TFLOPs: 30.33 | +7: iteration 6930/ 21553 | consumed samples: 1774080 | consumed tokens: 3633315840 | elapsed time per iteration (s): 0.30 | learning rate: 1.595E-04 | global batch size: 256 | lm loss: 3.351159E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.708 | TFLOPs: 30.34 | +7: iteration 6940/ 21553 | consumed samples: 1776640 | consumed tokens: 3638558720 | elapsed time per iteration (s): 0.30 | learning rate: 1.594E-04 | global batch size: 256 | lm loss: 3.352117E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.545 | TFLOPs: 30.34 | +7: iteration 6950/ 21553 | consumed samples: 1779200 | consumed tokens: 3643801600 | elapsed time per iteration (s): 0.30 | learning rate: 1.593E-04 | global batch size: 256 | lm loss: 3.343069E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.881 | TFLOPs: 30.35 | +7: iteration 6960/ 21553 | consumed samples: 1781760 | consumed tokens: 3649044480 | elapsed time per iteration (s): 0.30 | learning rate: 1.592E-04 | global batch size: 256 | lm loss: 3.356991E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.758 | TFLOPs: 30.34 | +7: iteration 6970/ 21553 | consumed samples: 1784320 | consumed tokens: 3654287360 | elapsed time per iteration (s): 0.30 | learning rate: 1.590E-04 | global batch size: 256 | lm loss: 3.347640E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.187 | TFLOPs: 30.32 | +7: iteration 6980/ 21553 | consumed samples: 1786880 | consumed tokens: 3659530240 | elapsed time per iteration (s): 0.30 | learning rate: 1.589E-04 | global batch size: 256 | lm loss: 3.355361E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.923 | TFLOPs: 30.35 | +7: iteration 6990/ 21553 | consumed samples: 1789440 | consumed tokens: 3664773120 | elapsed time per iteration (s): 0.30 | learning rate: 1.588E-04 | global batch size: 256 | lm loss: 3.352282E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.080 | TFLOPs: 30.35 | +7: iteration 7000/ 21553 | consumed samples: 1792000 | consumed tokens: 3670016000 | elapsed time per iteration (s): 0.30 | learning rate: 1.587E-04 | global batch size: 256 | lm loss: 3.361261E+00 | grad norm: 0.397 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.186 | TFLOPs: 30.32 | +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 7000 | lm loss value: 3.775902E+00 | lm loss PPL: 4.363686E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 7000 to checkpoints_146m14b100m +0: [2023-03-13 23:53:15,581] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step7000 is begin to save! +0: [2023-03-13 23:53:15,584] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:53:15,669] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:53:15,670] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:53:15,685] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:53:15,685] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:53:15,700] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:53:15,700] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:53:15,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:53:15,715] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:53:15,729] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:53:15,729] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:53:15,744] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:53:15,744] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:53:15,759] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:53:15,759] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:53:15,774] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:53:15,774] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:53:15,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:53:15,789] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:53:15,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:53:15,804] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:53:15,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:53:15,819] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:53:15,834] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:53:15,834] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:53:15,849] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:53:15,849] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:53:15,864] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:53:15,864] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:53:15,878] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:53:15,879] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:53:15,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:53:15,894] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:53:15,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:53:15,895] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step7000/mp_rank_00_model_states.pt +0: [2023-03-13 23:53:15,895] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:53:15,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:53:15,913] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,914] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:53:15,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,946] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,946] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,946] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2023-03-13 23:53:15,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,949] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,949] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,948] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,949] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,948] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:53:15,950] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,950] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2023-03-13 23:53:15,951] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,951] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,952] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,952] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,953] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,953] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,954] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,954] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,955] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,955] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +2: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +3: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:53:15,956] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:53:15,956] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,957] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,957] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:53:15,957] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,957] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:53:15,957] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +6: [2023-03-13 23:53:15,957] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,957] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:53:15,958] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,958] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,960] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,959] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,960] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,960] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,960] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,960] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +4: [2023-03-13 23:53:15,965] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:53:15,965] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:53:15,965] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,974] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,974] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,974] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: [2023-03-13 23:53:15,977] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:53:15,977] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,977] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,977] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,977] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +7: [2023-03-13 23:53:15,977] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:53:15,977] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-13 23:53:15,977] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,979] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,979] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,979] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,981] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,981] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,981] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,983] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,983] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,983] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,985] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,986] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,986] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +5: [2023-03-13 23:53:15,986] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:53:15,986] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:53:15,986] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,987] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,987] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,987] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,990] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,991] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,991] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +1: [2023-03-13 23:53:15,991] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:53:15,992] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step7000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-13 23:53:15,992] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step7000 is ready now! +0: successfully saved checkpoint at iteration 7000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 414.68 +7: iteration 7010/ 21553 | consumed samples: 1794560 | consumed tokens: 3675258880 | elapsed time per iteration (s): 0.35 | learning rate: 1.586E-04 | global batch size: 256 | lm loss: 3.349208E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 740.387 | TFLOPs: 25.92 | +7: iteration 7020/ 21553 | consumed samples: 1797120 | consumed tokens: 3680501760 | elapsed time per iteration (s): 0.30 | learning rate: 1.585E-04 | global batch size: 256 | lm loss: 3.364438E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.747 | TFLOPs: 30.34 | +7: iteration 7030/ 21553 | consumed samples: 1799680 | consumed tokens: 3685744640 | elapsed time per iteration (s): 0.30 | learning rate: 1.584E-04 | global batch size: 256 | lm loss: 3.345462E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.548 | TFLOPs: 30.34 | +7: iteration 7040/ 21553 | consumed samples: 1802240 | consumed tokens: 3690987520 | elapsed time per iteration (s): 0.30 | learning rate: 1.583E-04 | global batch size: 256 | lm loss: 3.345187E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.811 | TFLOPs: 30.17 | +7: iteration 7050/ 21553 | consumed samples: 1804800 | consumed tokens: 3696230400 | elapsed time per iteration (s): 0.30 | learning rate: 1.582E-04 | global batch size: 256 | lm loss: 3.365187E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.820 | TFLOPs: 30.34 | +7: iteration 7060/ 21553 | consumed samples: 1807360 | consumed tokens: 3701473280 | elapsed time per iteration (s): 0.30 | learning rate: 1.580E-04 | global batch size: 256 | lm loss: 3.339491E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.808 | TFLOPs: 30.31 | +7: iteration 7070/ 21553 | consumed samples: 1809920 | consumed tokens: 3706716160 | elapsed time per iteration (s): 0.30 | learning rate: 1.579E-04 | global batch size: 256 | lm loss: 3.359930E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.937 | TFLOPs: 30.35 | +7: iteration 7080/ 21553 | consumed samples: 1812480 | consumed tokens: 3711959040 | elapsed time per iteration (s): 0.30 | learning rate: 1.578E-04 | global batch size: 256 | lm loss: 3.362073E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.404 | TFLOPs: 30.33 | +7: iteration 7090/ 21553 | consumed samples: 1815040 | consumed tokens: 3717201920 | elapsed time per iteration (s): 0.30 | learning rate: 1.577E-04 | global batch size: 256 | lm loss: 3.351828E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.028 | TFLOPs: 30.35 | +7: iteration 7100/ 21553 | consumed samples: 1817600 | consumed tokens: 3722444800 | elapsed time per iteration (s): 0.30 | learning rate: 1.576E-04 | global batch size: 256 | lm loss: 3.358841E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.699 | TFLOPs: 30.34 | +7: iteration 7110/ 21553 | consumed samples: 1820160 | consumed tokens: 3727687680 | elapsed time per iteration (s): 0.30 | learning rate: 1.575E-04 | global batch size: 256 | lm loss: 3.352505E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.201 | TFLOPs: 30.36 | +7: iteration 7120/ 21553 | consumed samples: 1822720 | consumed tokens: 3732930560 | elapsed time per iteration (s): 0.30 | learning rate: 1.574E-04 | global batch size: 256 | lm loss: 3.354124E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.564 | TFLOPs: 30.34 | +7: iteration 7130/ 21553 | consumed samples: 1825280 | consumed tokens: 3738173440 | elapsed time per iteration (s): 0.30 | learning rate: 1.573E-04 | global batch size: 256 | lm loss: 3.338544E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.418 | TFLOPs: 30.33 | +7: iteration 7140/ 21553 | consumed samples: 1827840 | consumed tokens: 3743416320 | elapsed time per iteration (s): 0.30 | learning rate: 1.571E-04 | global batch size: 256 | lm loss: 3.354692E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.114 | TFLOPs: 30.32 | +7: iteration 7150/ 21553 | consumed samples: 1830400 | consumed tokens: 3748659200 | elapsed time per iteration (s): 0.30 | learning rate: 1.570E-04 | global batch size: 256 | lm loss: 3.337197E+00 | grad norm: 0.397 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.360 | TFLOPs: 30.33 | +7: iteration 7160/ 21553 | consumed samples: 1832960 | consumed tokens: 3753902080 | elapsed time per iteration (s): 0.30 | learning rate: 1.569E-04 | global batch size: 256 | lm loss: 3.339939E+00 | grad norm: 0.390 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.235 | TFLOPs: 30.32 | +7: iteration 7170/ 21553 | consumed samples: 1835520 | consumed tokens: 3759144960 | elapsed time per iteration (s): 0.30 | learning rate: 1.568E-04 | global batch size: 256 | lm loss: 3.323862E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.476 | TFLOPs: 30.30 | +7: iteration 7180/ 21553 | consumed samples: 1838080 | consumed tokens: 3764387840 | elapsed time per iteration (s): 0.30 | learning rate: 1.567E-04 | global batch size: 256 | lm loss: 3.351432E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.788 | TFLOPs: 30.31 | +7: iteration 7190/ 21553 | consumed samples: 1840640 | consumed tokens: 3769630720 | elapsed time per iteration (s): 0.30 | learning rate: 1.566E-04 | global batch size: 256 | lm loss: 3.336378E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.369 | TFLOPs: 30.33 | +7: iteration 7200/ 21553 | consumed samples: 1843200 | consumed tokens: 3774873600 | elapsed time per iteration (s): 0.30 | learning rate: 1.565E-04 | global batch size: 256 | lm loss: 3.345731E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.090 | TFLOPs: 30.32 | +7: iteration 7210/ 21553 | consumed samples: 1845760 | consumed tokens: 3780116480 | elapsed time per iteration (s): 0.30 | learning rate: 1.563E-04 | global batch size: 256 | lm loss: 3.341478E+00 | grad norm: 0.472 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.971 | TFLOPs: 30.32 | +7: iteration 7220/ 21553 | consumed samples: 1848320 | consumed tokens: 3785359360 | elapsed time per iteration (s): 0.30 | learning rate: 1.562E-04 | global batch size: 256 | lm loss: 3.332643E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.058 | TFLOPs: 30.32 | +7: iteration 7230/ 21553 | consumed samples: 1850880 | consumed tokens: 3790602240 | elapsed time per iteration (s): 0.30 | learning rate: 1.561E-04 | global batch size: 256 | lm loss: 3.337212E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.401 | TFLOPs: 30.33 | +7: iteration 7240/ 21553 | consumed samples: 1853440 | consumed tokens: 3795845120 | elapsed time per iteration (s): 0.30 | learning rate: 1.560E-04 | global batch size: 256 | lm loss: 3.330625E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.232 | TFLOPs: 30.32 | +7: iteration 7250/ 21553 | consumed samples: 1856000 | consumed tokens: 3801088000 | elapsed time per iteration (s): 0.30 | learning rate: 1.559E-04 | global batch size: 256 | lm loss: 3.329698E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.130 | TFLOPs: 30.32 | +7: iteration 7260/ 21553 | consumed samples: 1858560 | consumed tokens: 3806330880 | elapsed time per iteration (s): 0.30 | learning rate: 1.558E-04 | global batch size: 256 | lm loss: 3.337790E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.225 | TFLOPs: 30.32 | +7: iteration 7270/ 21553 | consumed samples: 1861120 | consumed tokens: 3811573760 | elapsed time per iteration (s): 0.30 | learning rate: 1.557E-04 | global batch size: 256 | lm loss: 3.339134E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.937 | TFLOPs: 30.31 | +7: iteration 7280/ 21553 | consumed samples: 1863680 | consumed tokens: 3816816640 | elapsed time per iteration (s): 0.30 | learning rate: 1.556E-04 | global batch size: 256 | lm loss: 3.339439E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.977 | TFLOPs: 30.32 | +7: iteration 7290/ 21553 | consumed samples: 1866240 | consumed tokens: 3822059520 | elapsed time per iteration (s): 0.30 | learning rate: 1.554E-04 | global batch size: 256 | lm loss: 3.340366E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.582 | TFLOPs: 30.30 | +7: iteration 7300/ 21553 | consumed samples: 1868800 | consumed tokens: 3827302400 | elapsed time per iteration (s): 0.30 | learning rate: 1.553E-04 | global batch size: 256 | lm loss: 3.333847E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.867 | TFLOPs: 30.31 | +7: iteration 7310/ 21553 | consumed samples: 1871360 | consumed tokens: 3832545280 | elapsed time per iteration (s): 0.30 | learning rate: 1.552E-04 | global batch size: 256 | lm loss: 3.336816E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.025 | TFLOPs: 30.32 | +7: iteration 7320/ 21553 | consumed samples: 1873920 | consumed tokens: 3837788160 | elapsed time per iteration (s): 0.30 | learning rate: 1.551E-04 | global batch size: 256 | lm loss: 3.342729E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.691 | TFLOPs: 30.31 | +7: iteration 7330/ 21553 | consumed samples: 1876480 | consumed tokens: 3843031040 | elapsed time per iteration (s): 0.30 | learning rate: 1.550E-04 | global batch size: 256 | lm loss: 3.344669E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.223 | TFLOPs: 30.29 | +7: iteration 7340/ 21553 | consumed samples: 1879040 | consumed tokens: 3848273920 | elapsed time per iteration (s): 0.30 | learning rate: 1.549E-04 | global batch size: 256 | lm loss: 3.336671E+00 | grad norm: 0.375 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.623 | TFLOPs: 30.30 | +7: iteration 7350/ 21553 | consumed samples: 1881600 | consumed tokens: 3853516800 | elapsed time per iteration (s): 0.30 | learning rate: 1.547E-04 | global batch size: 256 | lm loss: 3.326626E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.657 | TFLOPs: 30.30 | +7: iteration 7360/ 21553 | consumed samples: 1884160 | consumed tokens: 3858759680 | elapsed time per iteration (s): 0.30 | learning rate: 1.546E-04 | global batch size: 256 | lm loss: 3.335643E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.883 | TFLOPs: 30.31 | +7: iteration 7370/ 21553 | consumed samples: 1886720 | consumed tokens: 3864002560 | elapsed time per iteration (s): 0.30 | learning rate: 1.545E-04 | global batch size: 256 | lm loss: 3.325109E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.000 | TFLOPs: 30.32 | +7: iteration 7380/ 21553 | consumed samples: 1889280 | consumed tokens: 3869245440 | elapsed time per iteration (s): 0.30 | learning rate: 1.544E-04 | global batch size: 256 | lm loss: 3.332584E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.617 | TFLOPs: 30.30 | +7: iteration 7390/ 21553 | consumed samples: 1891840 | consumed tokens: 3874488320 | elapsed time per iteration (s): 0.30 | learning rate: 1.543E-04 | global batch size: 256 | lm loss: 3.325877E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.843 | TFLOPs: 30.31 | +7: iteration 7400/ 21553 | consumed samples: 1894400 | consumed tokens: 3879731200 | elapsed time per iteration (s): 0.30 | learning rate: 1.542E-04 | global batch size: 256 | lm loss: 3.337451E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.034 | TFLOPs: 30.32 | +7: iteration 7410/ 21553 | consumed samples: 1896960 | consumed tokens: 3884974080 | elapsed time per iteration (s): 0.30 | learning rate: 1.541E-04 | global batch size: 256 | lm loss: 3.339130E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.351 | TFLOPs: 30.29 | +7: iteration 7420/ 21553 | consumed samples: 1899520 | consumed tokens: 3890216960 | elapsed time per iteration (s): 0.30 | learning rate: 1.539E-04 | global batch size: 256 | lm loss: 3.341525E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.889 | TFLOPs: 30.31 | +7: iteration 7430/ 21553 | consumed samples: 1902080 | consumed tokens: 3895459840 | elapsed time per iteration (s): 0.30 | learning rate: 1.538E-04 | global batch size: 256 | lm loss: 3.340427E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.831 | TFLOPs: 30.31 | +7: iteration 7440/ 21553 | consumed samples: 1904640 | consumed tokens: 3900702720 | elapsed time per iteration (s): 0.30 | learning rate: 1.537E-04 | global batch size: 256 | lm loss: 3.336081E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.683 | TFLOPs: 30.34 | +7: iteration 7450/ 21553 | consumed samples: 1907200 | consumed tokens: 3905945600 | elapsed time per iteration (s): 0.30 | learning rate: 1.536E-04 | global batch size: 256 | lm loss: 3.325768E+00 | grad norm: 0.392 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.309 | TFLOPs: 30.33 | +7: iteration 7460/ 21553 | consumed samples: 1909760 | consumed tokens: 3911188480 | elapsed time per iteration (s): 0.30 | learning rate: 1.535E-04 | global batch size: 256 | lm loss: 3.329317E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.547 | TFLOPs: 30.34 | +7: iteration 7470/ 21553 | consumed samples: 1912320 | consumed tokens: 3916431360 | elapsed time per iteration (s): 0.30 | learning rate: 1.534E-04 | global batch size: 256 | lm loss: 3.321557E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.164 | TFLOPs: 30.32 | +7: iteration 7480/ 21553 | consumed samples: 1914880 | consumed tokens: 3921674240 | elapsed time per iteration (s): 0.30 | learning rate: 1.532E-04 | global batch size: 256 | lm loss: 3.329464E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.063 | TFLOPs: 30.32 | +7: iteration 7490/ 21553 | consumed samples: 1917440 | consumed tokens: 3926917120 | elapsed time per iteration (s): 0.30 | learning rate: 1.531E-04 | global batch size: 256 | lm loss: 3.328775E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.118 | TFLOPs: 30.32 | +7: iteration 7500/ 21553 | consumed samples: 1920000 | consumed tokens: 3932160000 | elapsed time per iteration (s): 0.30 | learning rate: 1.530E-04 | global batch size: 256 | lm loss: 3.333847E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.374 | TFLOPs: 30.33 | +7: iteration 7510/ 21553 | consumed samples: 1922560 | consumed tokens: 3937402880 | elapsed time per iteration (s): 0.30 | learning rate: 1.529E-04 | global batch size: 256 | lm loss: 3.322647E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.118 | TFLOPs: 30.32 | +7: iteration 7520/ 21553 | consumed samples: 1925120 | consumed tokens: 3942645760 | elapsed time per iteration (s): 0.30 | learning rate: 1.528E-04 | global batch size: 256 | lm loss: 3.320739E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.498 | TFLOPs: 30.33 | +7: iteration 7530/ 21553 | consumed samples: 1927680 | consumed tokens: 3947888640 | elapsed time per iteration (s): 0.30 | learning rate: 1.527E-04 | global batch size: 256 | lm loss: 3.338486E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.992 | TFLOPs: 30.32 | +7: iteration 7540/ 21553 | consumed samples: 1930240 | consumed tokens: 3953131520 | elapsed time per iteration (s): 0.30 | learning rate: 1.525E-04 | global batch size: 256 | lm loss: 3.327033E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.775 | TFLOPs: 30.31 | +7: iteration 7550/ 21553 | consumed samples: 1932800 | consumed tokens: 3958374400 | elapsed time per iteration (s): 0.30 | learning rate: 1.524E-04 | global batch size: 256 | lm loss: 3.310737E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.302 | TFLOPs: 30.33 | +7: iteration 7560/ 21553 | consumed samples: 1935360 | consumed tokens: 3963617280 | elapsed time per iteration (s): 0.30 | learning rate: 1.523E-04 | global batch size: 256 | lm loss: 3.333223E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.417 | TFLOPs: 30.30 | +7: iteration 7570/ 21553 | consumed samples: 1937920 | consumed tokens: 3968860160 | elapsed time per iteration (s): 0.30 | learning rate: 1.522E-04 | global batch size: 256 | lm loss: 3.309383E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.170 | TFLOPs: 30.32 | +7: iteration 7580/ 21553 | consumed samples: 1940480 | consumed tokens: 3974103040 | elapsed time per iteration (s): 0.30 | learning rate: 1.521E-04 | global batch size: 256 | lm loss: 3.332012E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.944 | TFLOPs: 30.31 | +7: iteration 7590/ 21553 | consumed samples: 1943040 | consumed tokens: 3979345920 | elapsed time per iteration (s): 0.30 | learning rate: 1.520E-04 | global batch size: 256 | lm loss: 3.311606E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.807 | TFLOPs: 30.31 | +7: iteration 7600/ 21553 | consumed samples: 1945600 | consumed tokens: 3984588800 | elapsed time per iteration (s): 0.30 | learning rate: 1.518E-04 | global batch size: 256 | lm loss: 3.326223E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.914 | TFLOPs: 30.31 | +7: iteration 7610/ 21553 | consumed samples: 1948160 | consumed tokens: 3989831680 | elapsed time per iteration (s): 0.30 | learning rate: 1.517E-04 | global batch size: 256 | lm loss: 3.329669E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.590 | TFLOPs: 30.30 | +7: iteration 7620/ 21553 | consumed samples: 1950720 | consumed tokens: 3995074560 | elapsed time per iteration (s): 0.30 | learning rate: 1.516E-04 | global batch size: 256 | lm loss: 3.316247E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.507 | TFLOPs: 30.30 | +7: iteration 7630/ 21553 | consumed samples: 1953280 | consumed tokens: 4000317440 | elapsed time per iteration (s): 0.30 | learning rate: 1.515E-04 | global batch size: 256 | lm loss: 3.306619E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.931 | TFLOPs: 30.31 | +7: iteration 7640/ 21553 | consumed samples: 1955840 | consumed tokens: 4005560320 | elapsed time per iteration (s): 0.30 | learning rate: 1.514E-04 | global batch size: 256 | lm loss: 3.313565E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.239 | TFLOPs: 30.32 | +7: iteration 7650/ 21553 | consumed samples: 1958400 | consumed tokens: 4010803200 | elapsed time per iteration (s): 0.30 | learning rate: 1.513E-04 | global batch size: 256 | lm loss: 3.319066E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.020 | TFLOPs: 30.04 | +7: iteration 7660/ 21553 | consumed samples: 1960960 | consumed tokens: 4016046080 | elapsed time per iteration (s): 0.30 | learning rate: 1.511E-04 | global batch size: 256 | lm loss: 3.329999E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.277 | TFLOPs: 30.33 | +7: iteration 7670/ 21553 | consumed samples: 1963520 | consumed tokens: 4021288960 | elapsed time per iteration (s): 0.30 | learning rate: 1.510E-04 | global batch size: 256 | lm loss: 3.315310E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.526 | TFLOPs: 30.30 | +7: iteration 7680/ 21553 | consumed samples: 1966080 | consumed tokens: 4026531840 | elapsed time per iteration (s): 0.30 | learning rate: 1.509E-04 | global batch size: 256 | lm loss: 3.315243E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.758 | TFLOPs: 30.31 | +7: iteration 7690/ 21553 | consumed samples: 1968640 | consumed tokens: 4031774720 | elapsed time per iteration (s): 0.30 | learning rate: 1.508E-04 | global batch size: 256 | lm loss: 3.313873E+00 | grad norm: 0.479 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.718 | TFLOPs: 30.31 | +7: iteration 7700/ 21553 | consumed samples: 1971200 | consumed tokens: 4037017600 | elapsed time per iteration (s): 0.30 | learning rate: 1.507E-04 | global batch size: 256 | lm loss: 3.331824E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.947 | TFLOPs: 30.31 | +7: iteration 7710/ 21553 | consumed samples: 1973760 | consumed tokens: 4042260480 | elapsed time per iteration (s): 0.30 | learning rate: 1.505E-04 | global batch size: 256 | lm loss: 3.319999E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.791 | TFLOPs: 30.31 | +7: iteration 7720/ 21553 | consumed samples: 1976320 | consumed tokens: 4047503360 | elapsed time per iteration (s): 0.30 | learning rate: 1.504E-04 | global batch size: 256 | lm loss: 3.304404E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.408 | TFLOPs: 30.33 | +7: iteration 7730/ 21553 | consumed samples: 1978880 | consumed tokens: 4052746240 | elapsed time per iteration (s): 0.30 | learning rate: 1.503E-04 | global batch size: 256 | lm loss: 3.311235E+00 | grad norm: 0.390 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.694 | TFLOPs: 30.31 | +7: iteration 7740/ 21553 | consumed samples: 1981440 | consumed tokens: 4057989120 | elapsed time per iteration (s): 0.30 | learning rate: 1.502E-04 | global batch size: 256 | lm loss: 3.312099E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.816 | TFLOPs: 30.31 | +7: iteration 7750/ 21553 | consumed samples: 1984000 | consumed tokens: 4063232000 | elapsed time per iteration (s): 0.30 | learning rate: 1.501E-04 | global batch size: 256 | lm loss: 3.325571E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.758 | TFLOPs: 30.31 | +7: iteration 7760/ 21553 | consumed samples: 1986560 | consumed tokens: 4068474880 | elapsed time per iteration (s): 0.30 | learning rate: 1.500E-04 | global batch size: 256 | lm loss: 3.292490E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.792 | TFLOPs: 30.31 | +7: iteration 7770/ 21553 | consumed samples: 1989120 | consumed tokens: 4073717760 | elapsed time per iteration (s): 0.30 | learning rate: 1.498E-04 | global batch size: 256 | lm loss: 3.303812E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.651 | TFLOPs: 30.30 | +7: iteration 7780/ 21553 | consumed samples: 1991680 | consumed tokens: 4078960640 | elapsed time per iteration (s): 0.30 | learning rate: 1.497E-04 | global batch size: 256 | lm loss: 3.314877E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.820 | TFLOPs: 30.31 | +7: iteration 7790/ 21553 | consumed samples: 1994240 | consumed tokens: 4084203520 | elapsed time per iteration (s): 0.30 | learning rate: 1.496E-04 | global batch size: 256 | lm loss: 3.304884E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.024 | TFLOPs: 30.32 | +7: iteration 7800/ 21553 | consumed samples: 1996800 | consumed tokens: 4089446400 | elapsed time per iteration (s): 0.30 | learning rate: 1.495E-04 | global batch size: 256 | lm loss: 3.317242E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.182 | TFLOPs: 30.25 | +7: iteration 7810/ 21553 | consumed samples: 1999360 | consumed tokens: 4094689280 | elapsed time per iteration (s): 0.30 | learning rate: 1.494E-04 | global batch size: 256 | lm loss: 3.301004E+00 | grad norm: 0.392 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.175 | TFLOPs: 30.18 | +7: iteration 7820/ 21553 | consumed samples: 2001920 | consumed tokens: 4099932160 | elapsed time per iteration (s): 0.30 | learning rate: 1.492E-04 | global batch size: 256 | lm loss: 3.316288E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.679 | TFLOPs: 30.20 | +7: iteration 7830/ 21553 | consumed samples: 2004480 | consumed tokens: 4105175040 | elapsed time per iteration (s): 0.30 | learning rate: 1.491E-04 | global batch size: 256 | lm loss: 3.311219E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.736 | TFLOPs: 30.20 | +7: iteration 7840/ 21553 | consumed samples: 2007040 | consumed tokens: 4110417920 | elapsed time per iteration (s): 0.30 | learning rate: 1.490E-04 | global batch size: 256 | lm loss: 3.298698E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.445 | TFLOPs: 30.19 | +7: iteration 7850/ 21553 | consumed samples: 2009600 | consumed tokens: 4115660800 | elapsed time per iteration (s): 0.30 | learning rate: 1.489E-04 | global batch size: 256 | lm loss: 3.295402E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.217 | TFLOPs: 30.18 | +7: iteration 7860/ 21553 | consumed samples: 2012160 | consumed tokens: 4120903680 | elapsed time per iteration (s): 0.30 | learning rate: 1.488E-04 | global batch size: 256 | lm loss: 3.293948E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.250 | TFLOPs: 30.18 | +7: iteration 7870/ 21553 | consumed samples: 2014720 | consumed tokens: 4126146560 | elapsed time per iteration (s): 0.30 | learning rate: 1.486E-04 | global batch size: 256 | lm loss: 3.299729E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.798 | TFLOPs: 30.17 | +7: iteration 7880/ 21553 | consumed samples: 2017280 | consumed tokens: 4131389440 | elapsed time per iteration (s): 0.30 | learning rate: 1.485E-04 | global batch size: 256 | lm loss: 3.322042E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.177 | TFLOPs: 30.18 | +7: iteration 7890/ 21553 | consumed samples: 2019840 | consumed tokens: 4136632320 | elapsed time per iteration (s): 0.30 | learning rate: 1.484E-04 | global batch size: 256 | lm loss: 3.319534E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.505 | TFLOPs: 30.19 | +7: iteration 7900/ 21553 | consumed samples: 2022400 | consumed tokens: 4141875200 | elapsed time per iteration (s): 0.30 | learning rate: 1.483E-04 | global batch size: 256 | lm loss: 3.303974E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.209 | TFLOPs: 30.22 | +7: iteration 7910/ 21553 | consumed samples: 2024960 | consumed tokens: 4147118080 | elapsed time per iteration (s): 0.30 | learning rate: 1.482E-04 | global batch size: 256 | lm loss: 3.297666E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.353 | TFLOPs: 30.33 | +7: iteration 7920/ 21553 | consumed samples: 2027520 | consumed tokens: 4152360960 | elapsed time per iteration (s): 0.30 | learning rate: 1.480E-04 | global batch size: 256 | lm loss: 3.290094E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.045 | TFLOPs: 30.32 | +7: iteration 7930/ 21553 | consumed samples: 2030080 | consumed tokens: 4157603840 | elapsed time per iteration (s): 0.30 | learning rate: 1.479E-04 | global batch size: 256 | lm loss: 3.301196E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.305 | TFLOPs: 30.33 | +7: iteration 7940/ 21553 | consumed samples: 2032640 | consumed tokens: 4162846720 | elapsed time per iteration (s): 0.30 | learning rate: 1.478E-04 | global batch size: 256 | lm loss: 3.309792E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.652 | TFLOPs: 29.74 | +7: iteration 7950/ 21553 | consumed samples: 2035200 | consumed tokens: 4168089600 | elapsed time per iteration (s): 0.30 | learning rate: 1.477E-04 | global batch size: 256 | lm loss: 3.294992E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.490 | TFLOPs: 30.33 | +7: iteration 7960/ 21553 | consumed samples: 2037760 | consumed tokens: 4173332480 | elapsed time per iteration (s): 0.30 | learning rate: 1.476E-04 | global batch size: 256 | lm loss: 3.307435E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.489 | TFLOPs: 30.33 | +7: iteration 7970/ 21553 | consumed samples: 2040320 | consumed tokens: 4178575360 | elapsed time per iteration (s): 0.30 | learning rate: 1.474E-04 | global batch size: 256 | lm loss: 3.293413E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.323 | TFLOPs: 30.29 | +7: iteration 7980/ 21553 | consumed samples: 2042880 | consumed tokens: 4183818240 | elapsed time per iteration (s): 0.30 | learning rate: 1.473E-04 | global batch size: 256 | lm loss: 3.298030E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.855 | TFLOPs: 30.35 | +7: iteration 7990/ 21553 | consumed samples: 2045440 | consumed tokens: 4189061120 | elapsed time per iteration (s): 0.30 | learning rate: 1.472E-04 | global batch size: 256 | lm loss: 3.299762E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.542 | TFLOPs: 30.02 | +0: [2023-03-13 23:58:11,831] [INFO] [logging.py:68:log_dist] [Rank 0] step=8000, skipped=0, lr=[0.00014708123251590496, 0.00014708123251590496, 0.00014708123251590496], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 8000/ 21553 | consumed samples: 2048000 | consumed tokens: 4194304000 | elapsed time per iteration (s): 0.30 | learning rate: 1.471E-04 | global batch size: 256 | lm loss: 3.282688E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.747 | TFLOPs: 30.31 | +0: steps: 8000 loss: 3.3056 iter time (s): 0.294 samples/sec: 871.494 +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 8000 | lm loss value: 3.779678E+00 | lm loss PPL: 4.380195E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 8000 to checkpoints_146m14b100m +0: [2023-03-13 23:58:11,952] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step8000 is begin to save! +0: [2023-03-13 23:58:11,955] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_01-model_00-model_states.pt... +0: [2023-03-13 23:58:12,042] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_01-model_00-model_states.pt. +0: [2023-03-13 23:58:12,042] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_03-model_00-model_states.pt... +0: [2023-03-13 23:58:12,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_03-model_00-model_states.pt. +0: [2023-03-13 23:58:12,058] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_04-model_00-model_states.pt... +0: [2023-03-13 23:58:12,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_04-model_00-model_states.pt. +0: [2023-03-13 23:58:12,073] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_05-model_00-model_states.pt... +0: [2023-03-13 23:58:12,087] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_05-model_00-model_states.pt. +0: [2023-03-13 23:58:12,088] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_06-model_00-model_states.pt... +0: [2023-03-13 23:58:12,102] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_06-model_00-model_states.pt. +0: [2023-03-13 23:58:12,103] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_07-model_00-model_states.pt... +0: [2023-03-13 23:58:12,117] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_07-model_00-model_states.pt. +0: [2023-03-13 23:58:12,118] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_08-model_00-model_states.pt... +0: [2023-03-13 23:58:12,132] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_08-model_00-model_states.pt. +0: [2023-03-13 23:58:12,133] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_09-model_00-model_states.pt... +0: [2023-03-13 23:58:12,147] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_09-model_00-model_states.pt. +0: [2023-03-13 23:58:12,148] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_10-model_00-model_states.pt... +0: [2023-03-13 23:58:12,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_10-model_00-model_states.pt. +0: [2023-03-13 23:58:12,163] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_11-model_00-model_states.pt... +0: [2023-03-13 23:58:12,177] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_11-model_00-model_states.pt. +0: [2023-03-13 23:58:12,178] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_12-model_00-model_states.pt... +0: [2023-03-13 23:58:12,192] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_12-model_00-model_states.pt. +0: [2023-03-13 23:58:12,193] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_13-model_00-model_states.pt... +0: [2023-03-13 23:58:12,207] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_13-model_00-model_states.pt. +0: [2023-03-13 23:58:12,208] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_14-model_00-model_states.pt... +0: [2023-03-13 23:58:12,222] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_14-model_00-model_states.pt. +0: [2023-03-13 23:58:12,223] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_15-model_00-model_states.pt... +0: [2023-03-13 23:58:12,237] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_15-model_00-model_states.pt. +0: [2023-03-13 23:58:12,238] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_16-model_00-model_states.pt... +0: [2023-03-13 23:58:12,253] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_16-model_00-model_states.pt. +0: [2023-03-13 23:58:12,253] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_17-model_00-model_states.pt... +0: [2023-03-13 23:58:12,268] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_17-model_00-model_states.pt. +0: [2023-03-13 23:58:12,268] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/layer_19-model_00-model_states.pt... +0: [2023-03-13 23:58:12,269] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/layer_19-model_00-model_states.pt. +0: [2023-03-13 23:58:12,269] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step8000/mp_rank_00_model_states.pt +0: [2023-03-13 23:58:12,269] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/mp_rank_00_model_states.pt... +0: [2023-03-13 23:58:12,271] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/mp_rank_00_model_states.pt. +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-13 23:58:12,288] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2023-03-13 23:58:12,319] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,320] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,320] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,321] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,321] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,321] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,322] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,322] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,323] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,323] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,324] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,324] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,324] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,325] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,325] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,323] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,323] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,325] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,325] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,325] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,325] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,326] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,326] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,322] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,322] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,322] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,327] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,327] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,329] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,329] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,330] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,330] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,329] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,330] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,330] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,330] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,331] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,331] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,331] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +2: [2023-03-13 23:58:12,331] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-13 23:58:12,331] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-13 23:58:12,331] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,332] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,332] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-13 23:58:12,332] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +3: [2023-03-13 23:58:12,333] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-13 23:58:12,333] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-13 23:58:12,333] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,333] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,334] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,334] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,334] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-13 23:58:12,334] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,328] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,328] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +1: [2023-03-13 23:58:12,334] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +5: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-13 23:58:12,332] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-13 23:58:12,332] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,336] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,336] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,336] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,336] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,340] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,340] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-13 23:58:12,340] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,340] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-13 23:58:12,340] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +7: [2023-03-13 23:58:12,340] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: [2023-03-13 23:58:12,349] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-13 23:58:12,350] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,355] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,355] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,355] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,356] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,356] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,356] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +4: [2023-03-13 23:58:12,360] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +6: [2023-03-13 23:58:12,360] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +4: [2023-03-13 23:58:12,360] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +6: [2023-03-13 23:58:12,360] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step8000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +4: [2023-03-13 23:58:12,360] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +6: [2023-03-13 23:58:12,360] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step8000 is ready now! +0: successfully saved checkpoint at iteration 8000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 412.84 +7: iteration 8010/ 21553 | consumed samples: 2050560 | consumed tokens: 4199546880 | elapsed time per iteration (s): 0.35 | learning rate: 1.470E-04 | global batch size: 256 | lm loss: 3.298768E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 735.975 | TFLOPs: 25.76 | +7: iteration 8020/ 21553 | consumed samples: 2053120 | consumed tokens: 4204789760 | elapsed time per iteration (s): 0.30 | learning rate: 1.468E-04 | global batch size: 256 | lm loss: 3.309700E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.136 | TFLOPs: 30.32 | +7: iteration 8030/ 21553 | consumed samples: 2055680 | consumed tokens: 4210032640 | elapsed time per iteration (s): 0.30 | learning rate: 1.467E-04 | global batch size: 256 | lm loss: 3.294949E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.913 | TFLOPs: 30.31 | +7: iteration 8040/ 21553 | consumed samples: 2058240 | consumed tokens: 4215275520 | elapsed time per iteration (s): 0.30 | learning rate: 1.466E-04 | global batch size: 256 | lm loss: 3.299760E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.690 | TFLOPs: 29.92 | +7: iteration 8050/ 21553 | consumed samples: 2060800 | consumed tokens: 4220518400 | elapsed time per iteration (s): 0.30 | learning rate: 1.465E-04 | global batch size: 256 | lm loss: 3.298135E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.062 | TFLOPs: 30.28 | +7: iteration 8060/ 21553 | consumed samples: 2063360 | consumed tokens: 4225761280 | elapsed time per iteration (s): 0.30 | learning rate: 1.464E-04 | global batch size: 256 | lm loss: 3.303680E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.870 | TFLOPs: 30.14 | +7: iteration 8070/ 21553 | consumed samples: 2065920 | consumed tokens: 4231004160 | elapsed time per iteration (s): 0.30 | learning rate: 1.462E-04 | global batch size: 256 | lm loss: 3.295148E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.023 | TFLOPs: 29.83 | +7: iteration 8080/ 21553 | consumed samples: 2068480 | consumed tokens: 4236247040 | elapsed time per iteration (s): 0.30 | learning rate: 1.461E-04 | global batch size: 256 | lm loss: 3.283456E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.965 | TFLOPs: 30.25 | +7: iteration 8090/ 21553 | consumed samples: 2071040 | consumed tokens: 4241489920 | elapsed time per iteration (s): 0.30 | learning rate: 1.460E-04 | global batch size: 256 | lm loss: 3.283363E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.771 | TFLOPs: 30.17 | +7: iteration 8100/ 21553 | consumed samples: 2073600 | consumed tokens: 4246732800 | elapsed time per iteration (s): 0.30 | learning rate: 1.459E-04 | global batch size: 256 | lm loss: 3.298454E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.124 | TFLOPs: 30.25 | +7: iteration 8110/ 21553 | consumed samples: 2076160 | consumed tokens: 4251975680 | elapsed time per iteration (s): 0.30 | learning rate: 1.457E-04 | global batch size: 256 | lm loss: 3.292406E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.026 | TFLOPs: 30.07 | +7: iteration 8120/ 21553 | consumed samples: 2078720 | consumed tokens: 4257218560 | elapsed time per iteration (s): 0.30 | learning rate: 1.456E-04 | global batch size: 256 | lm loss: 3.290800E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.644 | TFLOPs: 30.30 | +7: iteration 8130/ 21553 | consumed samples: 2081280 | consumed tokens: 4262461440 | elapsed time per iteration (s): 0.30 | learning rate: 1.455E-04 | global batch size: 256 | lm loss: 3.314428E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.417 | TFLOPs: 30.19 | +7: iteration 8140/ 21553 | consumed samples: 2083840 | consumed tokens: 4267704320 | elapsed time per iteration (s): 0.30 | learning rate: 1.454E-04 | global batch size: 256 | lm loss: 3.296639E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.514 | TFLOPs: 30.19 | +7: iteration 8150/ 21553 | consumed samples: 2086400 | consumed tokens: 4272947200 | elapsed time per iteration (s): 0.30 | learning rate: 1.453E-04 | global batch size: 256 | lm loss: 3.280489E+00 | grad norm: 0.392 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.809 | TFLOPs: 30.13 | +7: iteration 8160/ 21553 | consumed samples: 2088960 | consumed tokens: 4278190080 | elapsed time per iteration (s): 0.30 | learning rate: 1.451E-04 | global batch size: 256 | lm loss: 3.281116E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.482 | TFLOPs: 30.30 | +7: iteration 8170/ 21553 | consumed samples: 2091520 | consumed tokens: 4283432960 | elapsed time per iteration (s): 0.30 | learning rate: 1.450E-04 | global batch size: 256 | lm loss: 3.292741E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.049 | TFLOPs: 30.28 | +7: iteration 8180/ 21553 | consumed samples: 2094080 | consumed tokens: 4288675840 | elapsed time per iteration (s): 0.30 | learning rate: 1.449E-04 | global batch size: 256 | lm loss: 3.292595E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.745 | TFLOPs: 30.31 | +7: iteration 8190/ 21553 | consumed samples: 2096640 | consumed tokens: 4293918720 | elapsed time per iteration (s): 0.30 | learning rate: 1.448E-04 | global batch size: 256 | lm loss: 3.281200E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.149 | TFLOPs: 30.29 | +7: iteration 8200/ 21553 | consumed samples: 2099200 | consumed tokens: 4299161600 | elapsed time per iteration (s): 0.30 | learning rate: 1.447E-04 | global batch size: 256 | lm loss: 3.287414E+00 | grad norm: 0.393 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.540 | TFLOPs: 30.27 | +7: iteration 8210/ 21553 | consumed samples: 2101760 | consumed tokens: 4304404480 | elapsed time per iteration (s): 0.30 | learning rate: 1.445E-04 | global batch size: 256 | lm loss: 3.288660E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.327 | TFLOPs: 30.29 | +7: iteration 8220/ 21553 | consumed samples: 2104320 | consumed tokens: 4309647360 | elapsed time per iteration (s): 0.30 | learning rate: 1.444E-04 | global batch size: 256 | lm loss: 3.283033E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.289 | TFLOPs: 30.29 | +7: iteration 8230/ 21553 | consumed samples: 2106880 | consumed tokens: 4314890240 | elapsed time per iteration (s): 0.30 | learning rate: 1.443E-04 | global batch size: 256 | lm loss: 3.295356E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.140 | TFLOPs: 30.32 | +7: iteration 8240/ 21553 | consumed samples: 2109440 | consumed tokens: 4320133120 | elapsed time per iteration (s): 0.30 | learning rate: 1.442E-04 | global batch size: 256 | lm loss: 3.309613E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.647 | TFLOPs: 30.30 | +7: iteration 8250/ 21553 | consumed samples: 2112000 | consumed tokens: 4325376000 | elapsed time per iteration (s): 0.30 | learning rate: 1.440E-04 | global batch size: 256 | lm loss: 3.293387E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.597 | TFLOPs: 30.30 | +7: iteration 8260/ 21553 | consumed samples: 2114560 | consumed tokens: 4330618880 | elapsed time per iteration (s): 0.30 | learning rate: 1.439E-04 | global batch size: 256 | lm loss: 3.273822E+00 | grad norm: 0.394 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.644 | TFLOPs: 30.30 | +7: iteration 8270/ 21553 | consumed samples: 2117120 | consumed tokens: 4335861760 | elapsed time per iteration (s): 0.30 | learning rate: 1.438E-04 | global batch size: 256 | lm loss: 3.292039E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.384 | TFLOPs: 30.29 | +7: iteration 8280/ 21553 | consumed samples: 2119680 | consumed tokens: 4341104640 | elapsed time per iteration (s): 0.30 | learning rate: 1.437E-04 | global batch size: 256 | lm loss: 3.283155E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.790 | TFLOPs: 30.31 | +7: iteration 8290/ 21553 | consumed samples: 2122240 | consumed tokens: 4346347520 | elapsed time per iteration (s): 0.30 | learning rate: 1.435E-04 | global batch size: 256 | lm loss: 3.283530E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.450 | TFLOPs: 30.30 | +7: iteration 8300/ 21553 | consumed samples: 2124800 | consumed tokens: 4351590400 | elapsed time per iteration (s): 0.30 | learning rate: 1.434E-04 | global batch size: 256 | lm loss: 3.281065E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.127 | TFLOPs: 30.29 | +7: iteration 8310/ 21553 | consumed samples: 2127360 | consumed tokens: 4356833280 | elapsed time per iteration (s): 0.30 | learning rate: 1.433E-04 | global batch size: 256 | lm loss: 3.289111E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.293 | TFLOPs: 30.29 | +7: iteration 8320/ 21553 | consumed samples: 2129920 | consumed tokens: 4362076160 | elapsed time per iteration (s): 0.30 | learning rate: 1.432E-04 | global batch size: 256 | lm loss: 3.288894E+00 | grad norm: 0.387 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.133 | TFLOPs: 30.29 | +7: iteration 8330/ 21553 | consumed samples: 2132480 | consumed tokens: 4367319040 | elapsed time per iteration (s): 0.30 | learning rate: 1.431E-04 | global batch size: 256 | lm loss: 3.277795E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.074 | TFLOPs: 30.25 | +7: iteration 8340/ 21553 | consumed samples: 2135040 | consumed tokens: 4372561920 | elapsed time per iteration (s): 0.30 | learning rate: 1.429E-04 | global batch size: 256 | lm loss: 3.276365E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.221 | TFLOPs: 30.32 | +7: iteration 8350/ 21553 | consumed samples: 2137600 | consumed tokens: 4377804800 | elapsed time per iteration (s): 0.30 | learning rate: 1.428E-04 | global batch size: 256 | lm loss: 3.282812E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.640 | TFLOPs: 30.30 | +7: iteration 8360/ 21553 | consumed samples: 2140160 | consumed tokens: 4383047680 | elapsed time per iteration (s): 0.30 | learning rate: 1.427E-04 | global batch size: 256 | lm loss: 3.267043E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.628 | TFLOPs: 30.30 | +7: iteration 8370/ 21553 | consumed samples: 2142720 | consumed tokens: 4388290560 | elapsed time per iteration (s): 0.30 | learning rate: 1.426E-04 | global batch size: 256 | lm loss: 3.276036E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.793 | TFLOPs: 30.31 | +7: iteration 8380/ 21553 | consumed samples: 2145280 | consumed tokens: 4393533440 | elapsed time per iteration (s): 0.30 | learning rate: 1.424E-04 | global batch size: 256 | lm loss: 3.284905E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.072 | TFLOPs: 30.32 | +7: iteration 8390/ 21553 | consumed samples: 2147840 | consumed tokens: 4398776320 | elapsed time per iteration (s): 0.30 | learning rate: 1.423E-04 | global batch size: 256 | lm loss: 3.270508E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.913 | TFLOPs: 30.31 | +7: iteration 8400/ 21553 | consumed samples: 2150400 | consumed tokens: 4404019200 | elapsed time per iteration (s): 0.30 | learning rate: 1.422E-04 | global batch size: 256 | lm loss: 3.279391E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.233 | TFLOPs: 30.32 | +7: iteration 8410/ 21553 | consumed samples: 2152960 | consumed tokens: 4409262080 | elapsed time per iteration (s): 0.30 | learning rate: 1.421E-04 | global batch size: 256 | lm loss: 3.291372E+00 | grad norm: 0.392 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.245 | TFLOPs: 30.32 | +7: iteration 8420/ 21553 | consumed samples: 2155520 | consumed tokens: 4414504960 | elapsed time per iteration (s): 0.30 | learning rate: 1.419E-04 | global batch size: 256 | lm loss: 3.280562E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.886 | TFLOPs: 30.31 | +7: iteration 8430/ 21553 | consumed samples: 2158080 | consumed tokens: 4419747840 | elapsed time per iteration (s): 0.30 | learning rate: 1.418E-04 | global batch size: 256 | lm loss: 3.289361E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.263 | TFLOPs: 30.33 | +7: iteration 8440/ 21553 | consumed samples: 2160640 | consumed tokens: 4424990720 | elapsed time per iteration (s): 0.30 | learning rate: 1.417E-04 | global batch size: 256 | lm loss: 3.281369E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.153 | TFLOPs: 30.32 | +7: iteration 8450/ 21553 | consumed samples: 2163200 | consumed tokens: 4430233600 | elapsed time per iteration (s): 0.30 | learning rate: 1.416E-04 | global batch size: 256 | lm loss: 3.277721E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.153 | TFLOPs: 30.32 | +7: iteration 8460/ 21553 | consumed samples: 2165760 | consumed tokens: 4435476480 | elapsed time per iteration (s): 0.30 | learning rate: 1.414E-04 | global batch size: 256 | lm loss: 3.283404E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.360 | TFLOPs: 30.33 | +7: iteration 8470/ 21553 | consumed samples: 2168320 | consumed tokens: 4440719360 | elapsed time per iteration (s): 0.30 | learning rate: 1.413E-04 | global batch size: 256 | lm loss: 3.273547E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.762 | TFLOPs: 30.31 | +7: iteration 8480/ 21553 | consumed samples: 2170880 | consumed tokens: 4445962240 | elapsed time per iteration (s): 0.30 | learning rate: 1.412E-04 | global batch size: 256 | lm loss: 3.274747E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.410 | TFLOPs: 30.33 | +7: iteration 8490/ 21553 | consumed samples: 2173440 | consumed tokens: 4451205120 | elapsed time per iteration (s): 0.30 | learning rate: 1.411E-04 | global batch size: 256 | lm loss: 3.288821E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.833 | TFLOPs: 30.31 | +7: iteration 8500/ 21553 | consumed samples: 2176000 | consumed tokens: 4456448000 | elapsed time per iteration (s): 0.30 | learning rate: 1.409E-04 | global batch size: 256 | lm loss: 3.275535E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.679 | TFLOPs: 30.31 | +7: iteration 8510/ 21553 | consumed samples: 2178560 | consumed tokens: 4461690880 | elapsed time per iteration (s): 0.30 | learning rate: 1.408E-04 | global batch size: 256 | lm loss: 3.277485E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.336 | TFLOPs: 30.29 | +7: iteration 8520/ 21553 | consumed samples: 2181120 | consumed tokens: 4466933760 | elapsed time per iteration (s): 0.30 | learning rate: 1.407E-04 | global batch size: 256 | lm loss: 3.273760E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.380 | TFLOPs: 30.29 | +7: iteration 8530/ 21553 | consumed samples: 2183680 | consumed tokens: 4472176640 | elapsed time per iteration (s): 0.30 | learning rate: 1.406E-04 | global batch size: 256 | lm loss: 3.263389E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.523 | TFLOPs: 30.30 | +7: iteration 8540/ 21553 | consumed samples: 2186240 | consumed tokens: 4477419520 | elapsed time per iteration (s): 0.30 | learning rate: 1.405E-04 | global batch size: 256 | lm loss: 3.273117E+00 | grad norm: 0.386 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.126 | TFLOPs: 30.29 | +7: iteration 8550/ 21553 | consumed samples: 2188800 | consumed tokens: 4482662400 | elapsed time per iteration (s): 0.30 | learning rate: 1.403E-04 | global batch size: 256 | lm loss: 3.275218E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.217 | TFLOPs: 30.29 | +7: iteration 8560/ 21553 | consumed samples: 2191360 | consumed tokens: 4487905280 | elapsed time per iteration (s): 0.30 | learning rate: 1.402E-04 | global batch size: 256 | lm loss: 3.286266E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.050 | TFLOPs: 30.32 | +7: iteration 8570/ 21553 | consumed samples: 2193920 | consumed tokens: 4493148160 | elapsed time per iteration (s): 0.30 | learning rate: 1.401E-04 | global batch size: 256 | lm loss: 3.277118E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.425 | TFLOPs: 30.30 | +7: iteration 8580/ 21553 | consumed samples: 2196480 | consumed tokens: 4498391040 | elapsed time per iteration (s): 0.30 | learning rate: 1.400E-04 | global batch size: 256 | lm loss: 3.275737E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.559 | TFLOPs: 30.30 | +7: iteration 8590/ 21553 | consumed samples: 2199040 | consumed tokens: 4503633920 | elapsed time per iteration (s): 0.30 | learning rate: 1.398E-04 | global batch size: 256 | lm loss: 3.272387E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.665 | TFLOPs: 30.30 | +7: iteration 8600/ 21553 | consumed samples: 2201600 | consumed tokens: 4508876800 | elapsed time per iteration (s): 0.30 | learning rate: 1.397E-04 | global batch size: 256 | lm loss: 3.276133E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.778 | TFLOPs: 30.31 | +7: iteration 8610/ 21553 | consumed samples: 2204160 | consumed tokens: 4514119680 | elapsed time per iteration (s): 0.30 | learning rate: 1.396E-04 | global batch size: 256 | lm loss: 3.271662E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.500 | TFLOPs: 30.30 | +7: iteration 8620/ 21553 | consumed samples: 2206720 | consumed tokens: 4519362560 | elapsed time per iteration (s): 0.30 | learning rate: 1.395E-04 | global batch size: 256 | lm loss: 3.280005E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.660 | TFLOPs: 30.30 | +7: iteration 8630/ 21553 | consumed samples: 2209280 | consumed tokens: 4524605440 | elapsed time per iteration (s): 0.30 | learning rate: 1.393E-04 | global batch size: 256 | lm loss: 3.268227E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.954 | TFLOPs: 30.31 | +7: iteration 8640/ 21553 | consumed samples: 2211840 | consumed tokens: 4529848320 | elapsed time per iteration (s): 0.30 | learning rate: 1.392E-04 | global batch size: 256 | lm loss: 3.252410E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.085 | TFLOPs: 30.32 | +7: iteration 8650/ 21553 | consumed samples: 2214400 | consumed tokens: 4535091200 | elapsed time per iteration (s): 0.30 | learning rate: 1.391E-04 | global batch size: 256 | lm loss: 3.264294E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.860 | TFLOPs: 30.31 | +7: iteration 8660/ 21553 | consumed samples: 2216960 | consumed tokens: 4540334080 | elapsed time per iteration (s): 0.30 | learning rate: 1.389E-04 | global batch size: 256 | lm loss: 3.256121E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.837 | TFLOPs: 30.31 | +7: iteration 8670/ 21553 | consumed samples: 2219520 | consumed tokens: 4545576960 | elapsed time per iteration (s): 0.30 | learning rate: 1.388E-04 | global batch size: 256 | lm loss: 3.266857E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.775 | TFLOPs: 30.31 | +7: iteration 8680/ 21553 | consumed samples: 2222080 | consumed tokens: 4550819840 | elapsed time per iteration (s): 0.30 | learning rate: 1.387E-04 | global batch size: 256 | lm loss: 3.268015E+00 | grad norm: 0.397 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.141 | TFLOPs: 30.32 | +7: iteration 8690/ 21553 | consumed samples: 2224640 | consumed tokens: 4556062720 | elapsed time per iteration (s): 0.30 | learning rate: 1.386E-04 | global batch size: 256 | lm loss: 3.250715E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.533 | TFLOPs: 30.33 | +7: iteration 8700/ 21553 | consumed samples: 2227200 | consumed tokens: 4561305600 | elapsed time per iteration (s): 0.30 | learning rate: 1.384E-04 | global batch size: 256 | lm loss: 3.268430E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.715 | TFLOPs: 30.31 | +7: iteration 8710/ 21553 | consumed samples: 2229760 | consumed tokens: 4566548480 | elapsed time per iteration (s): 0.30 | learning rate: 1.383E-04 | global batch size: 256 | lm loss: 3.256006E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.048 | TFLOPs: 30.28 | +7: iteration 8720/ 21553 | consumed samples: 2232320 | consumed tokens: 4571791360 | elapsed time per iteration (s): 0.30 | learning rate: 1.382E-04 | global batch size: 256 | lm loss: 3.267304E+00 | grad norm: 0.390 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.893 | TFLOPs: 30.28 | +7: iteration 8730/ 21553 | consumed samples: 2234880 | consumed tokens: 4577034240 | elapsed time per iteration (s): 0.30 | learning rate: 1.381E-04 | global batch size: 256 | lm loss: 3.268130E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.704 | TFLOPs: 30.31 | +7: iteration 8740/ 21553 | consumed samples: 2237440 | consumed tokens: 4582277120 | elapsed time per iteration (s): 0.30 | learning rate: 1.379E-04 | global batch size: 256 | lm loss: 3.264398E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.065 | TFLOPs: 30.28 | +7: iteration 8750/ 21553 | consumed samples: 2240000 | consumed tokens: 4587520000 | elapsed time per iteration (s): 0.30 | learning rate: 1.378E-04 | global batch size: 256 | lm loss: 3.241438E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.810 | TFLOPs: 30.31 | +7: iteration 8760/ 21553 | consumed samples: 2242560 | consumed tokens: 4592762880 | elapsed time per iteration (s): 0.30 | learning rate: 1.377E-04 | global batch size: 256 | lm loss: 3.255291E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.356 | TFLOPs: 30.26 | +7: iteration 8770/ 21553 | consumed samples: 2245120 | consumed tokens: 4598005760 | elapsed time per iteration (s): 0.30 | learning rate: 1.376E-04 | global batch size: 256 | lm loss: 3.265577E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.613 | TFLOPs: 30.23 | +7: iteration 8780/ 21553 | consumed samples: 2247680 | consumed tokens: 4603248640 | elapsed time per iteration (s): 0.30 | learning rate: 1.374E-04 | global batch size: 256 | lm loss: 3.269961E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.030 | TFLOPs: 30.28 | +7: iteration 8790/ 21553 | consumed samples: 2250240 | consumed tokens: 4608491520 | elapsed time per iteration (s): 0.30 | learning rate: 1.373E-04 | global batch size: 256 | lm loss: 3.251582E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.272 | TFLOPs: 30.26 | +7: iteration 8800/ 21553 | consumed samples: 2252800 | consumed tokens: 4613734400 | elapsed time per iteration (s): 0.30 | learning rate: 1.372E-04 | global batch size: 256 | lm loss: 3.268047E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.469 | TFLOPs: 30.30 | +7: iteration 8810/ 21553 | consumed samples: 2255360 | consumed tokens: 4618977280 | elapsed time per iteration (s): 0.30 | learning rate: 1.371E-04 | global batch size: 256 | lm loss: 3.262900E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.839 | TFLOPs: 30.31 | +7: iteration 8820/ 21553 | consumed samples: 2257920 | consumed tokens: 4624220160 | elapsed time per iteration (s): 0.30 | learning rate: 1.369E-04 | global batch size: 256 | lm loss: 3.265305E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.732 | TFLOPs: 30.31 | +7: iteration 8830/ 21553 | consumed samples: 2260480 | consumed tokens: 4629463040 | elapsed time per iteration (s): 0.30 | learning rate: 1.368E-04 | global batch size: 256 | lm loss: 3.252237E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.869 | TFLOPs: 30.31 | +7: iteration 8840/ 21553 | consumed samples: 2263040 | consumed tokens: 4634705920 | elapsed time per iteration (s): 0.30 | learning rate: 1.367E-04 | global batch size: 256 | lm loss: 3.256762E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.450 | TFLOPs: 30.30 | +7: iteration 8850/ 21553 | consumed samples: 2265600 | consumed tokens: 4639948800 | elapsed time per iteration (s): 0.30 | learning rate: 1.366E-04 | global batch size: 256 | lm loss: 3.254983E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.225 | TFLOPs: 30.29 | +7: iteration 8860/ 21553 | consumed samples: 2268160 | consumed tokens: 4645191680 | elapsed time per iteration (s): 0.30 | learning rate: 1.364E-04 | global batch size: 256 | lm loss: 3.262075E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.652 | TFLOPs: 30.30 | +7: iteration 8870/ 21553 | consumed samples: 2270720 | consumed tokens: 4650434560 | elapsed time per iteration (s): 0.30 | learning rate: 1.363E-04 | global batch size: 256 | lm loss: 3.269323E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.500 | TFLOPs: 30.30 | +7: iteration 8880/ 21553 | consumed samples: 2273280 | consumed tokens: 4655677440 | elapsed time per iteration (s): 0.30 | learning rate: 1.362E-04 | global batch size: 256 | lm loss: 3.255151E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.390 | TFLOPs: 30.29 | +7: iteration 8890/ 21553 | consumed samples: 2275840 | consumed tokens: 4660920320 | elapsed time per iteration (s): 0.30 | learning rate: 1.360E-04 | global batch size: 256 | lm loss: 3.258433E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.533 | TFLOPs: 30.30 | +7: iteration 8900/ 21553 | consumed samples: 2278400 | consumed tokens: 4666163200 | elapsed time per iteration (s): 0.30 | learning rate: 1.359E-04 | global batch size: 256 | lm loss: 3.274493E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.392 | TFLOPs: 30.29 | +7: iteration 8910/ 21553 | consumed samples: 2280960 | consumed tokens: 4671406080 | elapsed time per iteration (s): 0.30 | learning rate: 1.358E-04 | global batch size: 256 | lm loss: 3.271812E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.796 | TFLOPs: 30.27 | +7: iteration 8920/ 21553 | consumed samples: 2283520 | consumed tokens: 4676648960 | elapsed time per iteration (s): 0.30 | learning rate: 1.357E-04 | global batch size: 256 | lm loss: 3.261322E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.757 | TFLOPs: 30.31 | +7: iteration 8930/ 21553 | consumed samples: 2286080 | consumed tokens: 4681891840 | elapsed time per iteration (s): 0.30 | learning rate: 1.355E-04 | global batch size: 256 | lm loss: 3.260102E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.536 | TFLOPs: 30.27 | +7: iteration 8940/ 21553 | consumed samples: 2288640 | consumed tokens: 4687134720 | elapsed time per iteration (s): 0.30 | learning rate: 1.354E-04 | global batch size: 256 | lm loss: 3.237811E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.221 | TFLOPs: 30.29 | +7: iteration 8950/ 21553 | consumed samples: 2291200 | consumed tokens: 4692377600 | elapsed time per iteration (s): 0.30 | learning rate: 1.353E-04 | global batch size: 256 | lm loss: 3.238836E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.545 | TFLOPs: 30.30 | +7: iteration 8960/ 21553 | consumed samples: 2293760 | consumed tokens: 4697620480 | elapsed time per iteration (s): 0.30 | learning rate: 1.352E-04 | global batch size: 256 | lm loss: 3.239138E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.782 | TFLOPs: 30.31 | +7: iteration 8970/ 21553 | consumed samples: 2296320 | consumed tokens: 4702863360 | elapsed time per iteration (s): 0.30 | learning rate: 1.350E-04 | global batch size: 256 | lm loss: 3.253060E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.171 | TFLOPs: 30.29 | +7: iteration 8980/ 21553 | consumed samples: 2298880 | consumed tokens: 4708106240 | elapsed time per iteration (s): 0.30 | learning rate: 1.349E-04 | global batch size: 256 | lm loss: 3.256074E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.112 | TFLOPs: 30.29 | +7: iteration 8990/ 21553 | consumed samples: 2301440 | consumed tokens: 4713349120 | elapsed time per iteration (s): 0.30 | learning rate: 1.348E-04 | global batch size: 256 | lm loss: 3.266744E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.810 | TFLOPs: 30.27 | +7: iteration 9000/ 21553 | consumed samples: 2304000 | consumed tokens: 4718592000 | elapsed time per iteration (s): 0.30 | learning rate: 1.346E-04 | global batch size: 256 | lm loss: 3.250921E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.805 | TFLOPs: 30.27 | +7: ----------------------------------------------------------------------------------------------- +7: validation loss at iteration 9000 | lm loss value: 3.729561E+00 | lm loss PPL: 4.166083E+01 | +7: ----------------------------------------------------------------------------------------------- +0: saving checkpoint at iteration 9000 to checkpoints_146m14b100m +0: [2023-03-14 00:03:08,414] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step9000 is begin to save! +0: [2023-03-14 00:03:08,417] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:03:08,502] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:03:08,502] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:03:08,517] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:03:08,517] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:03:08,532] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:03:08,532] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:03:08,547] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:03:08,547] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:03:08,562] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:03:08,562] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:03:08,577] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:03:08,577] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:03:08,592] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:03:08,592] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:03:08,607] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:03:08,607] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:03:08,622] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:03:08,622] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:03:08,637] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:03:08,637] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:03:08,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:03:08,652] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:03:08,667] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:03:08,667] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:03:08,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:03:08,682] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:03:08,697] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:03:08,697] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:03:08,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:03:08,712] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:03:08,727] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:03:08,727] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:03:08,728] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:03:08,729] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step9000/mp_rank_00_model_states.pt +0: [2023-03-14 00:03:08,729] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:03:08,731] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:03:08,748] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:03:08,780] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,781] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,779] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,781] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,779] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,779] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,781] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,781] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,782] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,782] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,782] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,783] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,783] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,783] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,784] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,784] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,785] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,785] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,785] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,785] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,785] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,785] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,788] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,784] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,784] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,788] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,789] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,789] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,789] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,790] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:03:08,792] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,792] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +5: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +7: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,794] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,794] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,794] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,794] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,794] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,794] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:03:08,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +2: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:03:08,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:03:08,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,786] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,786] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,787] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,787] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,788] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,791] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,791] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: [2023-03-14 00:03:08,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:03:08,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +3: [2023-03-14 00:03:08,808] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:03:08,808] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:03:08,808] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +1: [2023-03-14 00:03:08,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:03:08,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:03:08,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,813] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,813] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,813] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,815] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,815] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,815] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +4: [2023-03-14 00:03:08,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:03:08,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:03:08,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +6: [2023-03-14 00:03:08,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:03:08,822] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step9000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:03:08,822] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step9000 is ready now! +0: successfully saved checkpoint at iteration 9000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 412.65 +7: iteration 9010/ 21553 | consumed samples: 2306560 | consumed tokens: 4723834880 | elapsed time per iteration (s): 0.35 | learning rate: 1.345E-04 | global batch size: 256 | lm loss: 3.250571E+00 | grad norm: 0.485 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 740.424 | TFLOPs: 25.92 | +7: iteration 9020/ 21553 | consumed samples: 2309120 | consumed tokens: 4729077760 | elapsed time per iteration (s): 0.30 | learning rate: 1.344E-04 | global batch size: 256 | lm loss: 3.258308E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.519 | TFLOPs: 30.30 | +7: iteration 9030/ 21553 | consumed samples: 2311680 | consumed tokens: 4734320640 | elapsed time per iteration (s): 0.30 | learning rate: 1.343E-04 | global batch size: 256 | lm loss: 3.248545E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.216 | TFLOPs: 30.29 | +7: iteration 9040/ 21553 | consumed samples: 2314240 | consumed tokens: 4739563520 | elapsed time per iteration (s): 0.30 | learning rate: 1.341E-04 | global batch size: 256 | lm loss: 3.258706E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.061 | TFLOPs: 30.32 | +7: iteration 9050/ 21553 | consumed samples: 2316800 | consumed tokens: 4744806400 | elapsed time per iteration (s): 0.30 | learning rate: 1.340E-04 | global batch size: 256 | lm loss: 3.255415E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.209 | TFLOPs: 30.15 | +7: iteration 9060/ 21553 | consumed samples: 2319360 | consumed tokens: 4750049280 | elapsed time per iteration (s): 0.30 | learning rate: 1.339E-04 | global batch size: 256 | lm loss: 3.231957E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.764 | TFLOPs: 30.34 | +7: iteration 9070/ 21553 | consumed samples: 2321920 | consumed tokens: 4755292160 | elapsed time per iteration (s): 0.30 | learning rate: 1.338E-04 | global batch size: 256 | lm loss: 3.255163E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.684 | TFLOPs: 30.34 | +7: iteration 9080/ 21553 | consumed samples: 2324480 | consumed tokens: 4760535040 | elapsed time per iteration (s): 0.30 | learning rate: 1.336E-04 | global batch size: 256 | lm loss: 3.248133E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.142 | TFLOPs: 30.32 | +7: iteration 9090/ 21553 | consumed samples: 2327040 | consumed tokens: 4765777920 | elapsed time per iteration (s): 0.30 | learning rate: 1.335E-04 | global batch size: 256 | lm loss: 3.252998E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.589 | TFLOPs: 30.34 | +7: iteration 9100/ 21553 | consumed samples: 2329600 | consumed tokens: 4771020800 | elapsed time per iteration (s): 0.30 | learning rate: 1.334E-04 | global batch size: 256 | lm loss: 3.245920E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.640 | TFLOPs: 30.34 | +7: iteration 9110/ 21553 | consumed samples: 2332160 | consumed tokens: 4776263680 | elapsed time per iteration (s): 0.30 | learning rate: 1.332E-04 | global batch size: 256 | lm loss: 3.240838E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.437 | TFLOPs: 30.23 | +7: iteration 9120/ 21553 | consumed samples: 2334720 | consumed tokens: 4781506560 | elapsed time per iteration (s): 0.30 | learning rate: 1.331E-04 | global batch size: 256 | lm loss: 3.247672E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.194 | TFLOPs: 30.36 | +7: iteration 9130/ 21553 | consumed samples: 2337280 | consumed tokens: 4786749440 | elapsed time per iteration (s): 0.30 | learning rate: 1.330E-04 | global batch size: 256 | lm loss: 3.236797E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.090 | TFLOPs: 30.35 | +7: iteration 9140/ 21553 | consumed samples: 2339840 | consumed tokens: 4791992320 | elapsed time per iteration (s): 0.30 | learning rate: 1.329E-04 | global batch size: 256 | lm loss: 3.241863E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.892 | TFLOPs: 30.35 | +7: iteration 9150/ 21553 | consumed samples: 2342400 | consumed tokens: 4797235200 | elapsed time per iteration (s): 0.30 | learning rate: 1.327E-04 | global batch size: 256 | lm loss: 3.250653E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.361 | TFLOPs: 30.36 | +7: iteration 9160/ 21553 | consumed samples: 2344960 | consumed tokens: 4802478080 | elapsed time per iteration (s): 0.30 | learning rate: 1.326E-04 | global batch size: 256 | lm loss: 3.245869E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.498 | TFLOPs: 30.37 | +7: iteration 9170/ 21553 | consumed samples: 2347520 | consumed tokens: 4807720960 | elapsed time per iteration (s): 0.30 | learning rate: 1.325E-04 | global batch size: 256 | lm loss: 3.234009E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.409 | TFLOPs: 30.37 | +7: iteration 9180/ 21553 | consumed samples: 2350080 | consumed tokens: 4812963840 | elapsed time per iteration (s): 0.30 | learning rate: 1.323E-04 | global batch size: 256 | lm loss: 3.232050E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.701 | TFLOPs: 30.38 | +7: iteration 9190/ 21553 | consumed samples: 2352640 | consumed tokens: 4818206720 | elapsed time per iteration (s): 0.30 | learning rate: 1.322E-04 | global batch size: 256 | lm loss: 3.253571E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.300 | TFLOPs: 30.36 | +7: iteration 9200/ 21553 | consumed samples: 2355200 | consumed tokens: 4823449600 | elapsed time per iteration (s): 0.30 | learning rate: 1.321E-04 | global batch size: 256 | lm loss: 3.241899E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.316 | TFLOPs: 30.36 | +7: iteration 9210/ 21553 | consumed samples: 2357760 | consumed tokens: 4828692480 | elapsed time per iteration (s): 0.30 | learning rate: 1.320E-04 | global batch size: 256 | lm loss: 3.238536E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.144 | TFLOPs: 30.36 | +7: iteration 9220/ 21553 | consumed samples: 2360320 | consumed tokens: 4833935360 | elapsed time per iteration (s): 0.30 | learning rate: 1.318E-04 | global batch size: 256 | lm loss: 3.244328E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.900 | TFLOPs: 30.35 | +7: iteration 9230/ 21553 | consumed samples: 2362880 | consumed tokens: 4839178240 | elapsed time per iteration (s): 0.29 | learning rate: 1.317E-04 | global batch size: 256 | lm loss: 3.238781E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.072 | TFLOPs: 30.39 | +7: iteration 9240/ 21553 | consumed samples: 2365440 | consumed tokens: 4844421120 | elapsed time per iteration (s): 0.29 | learning rate: 1.316E-04 | global batch size: 256 | lm loss: 3.241286E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.040 | TFLOPs: 30.39 | +7: iteration 9250/ 21553 | consumed samples: 2368000 | consumed tokens: 4849664000 | elapsed time per iteration (s): 0.29 | learning rate: 1.314E-04 | global batch size: 256 | lm loss: 3.243122E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.040 | TFLOPs: 30.39 | +7: iteration 9260/ 21553 | consumed samples: 2370560 | consumed tokens: 4854906880 | elapsed time per iteration (s): 0.30 | learning rate: 1.313E-04 | global batch size: 256 | lm loss: 3.230116E+00 | grad norm: 0.402 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.911 | TFLOPs: 30.35 | +7: iteration 9270/ 21553 | consumed samples: 2373120 | consumed tokens: 4860149760 | elapsed time per iteration (s): 0.30 | learning rate: 1.312E-04 | global batch size: 256 | lm loss: 3.238692E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.415 | TFLOPs: 30.33 | +7: iteration 9280/ 21553 | consumed samples: 2375680 | consumed tokens: 4865392640 | elapsed time per iteration (s): 0.30 | learning rate: 1.311E-04 | global batch size: 256 | lm loss: 3.229480E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.125 | TFLOPs: 30.32 | +7: iteration 9290/ 21553 | consumed samples: 2378240 | consumed tokens: 4870635520 | elapsed time per iteration (s): 0.30 | learning rate: 1.309E-04 | global batch size: 256 | lm loss: 3.246702E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.176 | TFLOPs: 30.32 | +7: iteration 9300/ 21553 | consumed samples: 2380800 | consumed tokens: 4875878400 | elapsed time per iteration (s): 0.30 | learning rate: 1.308E-04 | global batch size: 256 | lm loss: 3.238414E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.976 | TFLOPs: 30.32 | +7: iteration 9310/ 21553 | consumed samples: 2383360 | consumed tokens: 4881121280 | elapsed time per iteration (s): 0.30 | learning rate: 1.307E-04 | global batch size: 256 | lm loss: 3.234389E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.233 | TFLOPs: 30.32 | +7: iteration 9320/ 21553 | consumed samples: 2385920 | consumed tokens: 4886364160 | elapsed time per iteration (s): 0.30 | learning rate: 1.305E-04 | global batch size: 256 | lm loss: 3.226146E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.476 | TFLOPs: 30.33 | +7: iteration 9330/ 21553 | consumed samples: 2388480 | consumed tokens: 4891607040 | elapsed time per iteration (s): 0.30 | learning rate: 1.304E-04 | global batch size: 256 | lm loss: 3.245565E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.139 | TFLOPs: 30.32 | +7: iteration 9340/ 21553 | consumed samples: 2391040 | consumed tokens: 4896849920 | elapsed time per iteration (s): 0.30 | learning rate: 1.303E-04 | global batch size: 256 | lm loss: 3.243803E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.006 | TFLOPs: 30.32 | +7: iteration 9350/ 21553 | consumed samples: 2393600 | consumed tokens: 4902092800 | elapsed time per iteration (s): 0.30 | learning rate: 1.302E-04 | global batch size: 256 | lm loss: 3.224854E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.017 | TFLOPs: 30.32 | +7: iteration 9360/ 21553 | consumed samples: 2396160 | consumed tokens: 4907335680 | elapsed time per iteration (s): 0.30 | learning rate: 1.300E-04 | global batch size: 256 | lm loss: 3.247822E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.165 | TFLOPs: 30.32 | +7: iteration 9370/ 21553 | consumed samples: 2398720 | consumed tokens: 4912578560 | elapsed time per iteration (s): 0.30 | learning rate: 1.299E-04 | global batch size: 256 | lm loss: 3.236438E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.506 | TFLOPs: 30.33 | +7: iteration 9380/ 21553 | consumed samples: 2401280 | consumed tokens: 4917821440 | elapsed time per iteration (s): 0.30 | learning rate: 1.298E-04 | global batch size: 256 | lm loss: 3.242050E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.911 | TFLOPs: 30.31 | +7: iteration 9390/ 21553 | consumed samples: 2403840 | consumed tokens: 4923064320 | elapsed time per iteration (s): 0.30 | learning rate: 1.296E-04 | global batch size: 256 | lm loss: 3.229359E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.635 | TFLOPs: 30.34 | +7: iteration 9400/ 21553 | consumed samples: 2406400 | consumed tokens: 4928307200 | elapsed time per iteration (s): 0.30 | learning rate: 1.295E-04 | global batch size: 256 | lm loss: 3.235857E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.627 | TFLOPs: 30.02 | +7: iteration 9410/ 21553 | consumed samples: 2408960 | consumed tokens: 4933550080 | elapsed time per iteration (s): 0.30 | learning rate: 1.294E-04 | global batch size: 256 | lm loss: 3.248798E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.462 | TFLOPs: 30.33 | +7: iteration 9420/ 21553 | consumed samples: 2411520 | consumed tokens: 4938792960 | elapsed time per iteration (s): 0.30 | learning rate: 1.293E-04 | global batch size: 256 | lm loss: 3.234136E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.380 | TFLOPs: 30.01 | +7: iteration 9430/ 21553 | consumed samples: 2414080 | consumed tokens: 4944035840 | elapsed time per iteration (s): 0.30 | learning rate: 1.291E-04 | global batch size: 256 | lm loss: 3.247964E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.425 | TFLOPs: 30.33 | +7: iteration 9440/ 21553 | consumed samples: 2416640 | consumed tokens: 4949278720 | elapsed time per iteration (s): 0.30 | learning rate: 1.290E-04 | global batch size: 256 | lm loss: 3.222742E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.722 | TFLOPs: 30.31 | +7: iteration 9450/ 21553 | consumed samples: 2419200 | consumed tokens: 4954521600 | elapsed time per iteration (s): 0.30 | learning rate: 1.289E-04 | global batch size: 256 | lm loss: 3.228381E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.409 | TFLOPs: 30.33 | +7: iteration 9460/ 21553 | consumed samples: 2421760 | consumed tokens: 4959764480 | elapsed time per iteration (s): 0.31 | learning rate: 1.287E-04 | global batch size: 256 | lm loss: 3.245356E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.273 | TFLOPs: 28.93 | +7: iteration 9470/ 21553 | consumed samples: 2424320 | consumed tokens: 4965007360 | elapsed time per iteration (s): 0.33 | learning rate: 1.286E-04 | global batch size: 256 | lm loss: 3.236799E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 780.872 | TFLOPs: 27.34 | +7: iteration 9480/ 21553 | consumed samples: 2426880 | consumed tokens: 4970250240 | elapsed time per iteration (s): 0.33 | learning rate: 1.285E-04 | global batch size: 256 | lm loss: 3.240762E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 765.040 | TFLOPs: 26.78 | +7: iteration 9490/ 21553 | consumed samples: 2429440 | consumed tokens: 4975493120 | elapsed time per iteration (s): 0.30 | learning rate: 1.283E-04 | global batch size: 256 | lm loss: 3.221566E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.050 | TFLOPs: 30.35 | +7: iteration 9500/ 21553 | consumed samples: 2432000 | consumed tokens: 4980736000 | elapsed time per iteration (s): 0.30 | learning rate: 1.282E-04 | global batch size: 256 | lm loss: 3.226636E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.048 | TFLOPs: 30.32 | +7: iteration 9510/ 21553 | consumed samples: 2434560 | consumed tokens: 4985978880 | elapsed time per iteration (s): 0.30 | learning rate: 1.281E-04 | global batch size: 256 | lm loss: 3.230966E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.977 | TFLOPs: 30.32 | +7: iteration 9520/ 21553 | consumed samples: 2437120 | consumed tokens: 4991221760 | elapsed time per iteration (s): 0.30 | learning rate: 1.280E-04 | global batch size: 256 | lm loss: 3.218149E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.011 | TFLOPs: 30.35 | +7: iteration 9530/ 21553 | consumed samples: 2439680 | consumed tokens: 4996464640 | elapsed time per iteration (s): 0.30 | learning rate: 1.278E-04 | global batch size: 256 | lm loss: 3.235210E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.034 | TFLOPs: 30.35 | +7: iteration 9540/ 21553 | consumed samples: 2442240 | consumed tokens: 5001707520 | elapsed time per iteration (s): 0.30 | learning rate: 1.277E-04 | global batch size: 256 | lm loss: 3.226434E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.866 | TFLOPs: 30.35 | +7: iteration 9550/ 21553 | consumed samples: 2444800 | consumed tokens: 5006950400 | elapsed time per iteration (s): 0.30 | learning rate: 1.276E-04 | global batch size: 256 | lm loss: 3.222792E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.469 | TFLOPs: 30.33 | +7: iteration 9560/ 21553 | consumed samples: 2447360 | consumed tokens: 5012193280 | elapsed time per iteration (s): 0.30 | learning rate: 1.274E-04 | global batch size: 256 | lm loss: 3.233875E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.671 | TFLOPs: 30.34 | +7: iteration 9570/ 21553 | consumed samples: 2449920 | consumed tokens: 5017436160 | elapsed time per iteration (s): 0.30 | learning rate: 1.273E-04 | global batch size: 256 | lm loss: 3.227177E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.989 | TFLOPs: 30.35 | +7: iteration 9580/ 21553 | consumed samples: 2452480 | consumed tokens: 5022679040 | elapsed time per iteration (s): 0.30 | learning rate: 1.272E-04 | global batch size: 256 | lm loss: 3.214402E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.872 | TFLOPs: 30.35 | +7: iteration 9590/ 21553 | consumed samples: 2455040 | consumed tokens: 5027921920 | elapsed time per iteration (s): 0.30 | learning rate: 1.270E-04 | global batch size: 256 | lm loss: 3.227029E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.719 | TFLOPs: 30.34 | +7: iteration 9600/ 21553 | consumed samples: 2457600 | consumed tokens: 5033164800 | elapsed time per iteration (s): 0.30 | learning rate: 1.269E-04 | global batch size: 256 | lm loss: 3.234010E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.783 | TFLOPs: 30.34 | +7: iteration 9610/ 21553 | consumed samples: 2460160 | consumed tokens: 5038407680 | elapsed time per iteration (s): 0.30 | learning rate: 1.268E-04 | global batch size: 256 | lm loss: 3.225730E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.903 | TFLOPs: 30.35 | +7: iteration 9620/ 21553 | consumed samples: 2462720 | consumed tokens: 5043650560 | elapsed time per iteration (s): 0.30 | learning rate: 1.267E-04 | global batch size: 256 | lm loss: 3.222127E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.623 | TFLOPs: 30.06 | +7: iteration 9630/ 21553 | consumed samples: 2465280 | consumed tokens: 5048893440 | elapsed time per iteration (s): 0.30 | learning rate: 1.265E-04 | global batch size: 256 | lm loss: 3.227839E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.656 | TFLOPs: 30.34 | +7: iteration 9640/ 21553 | consumed samples: 2467840 | consumed tokens: 5054136320 | elapsed time per iteration (s): 0.30 | learning rate: 1.264E-04 | global batch size: 256 | lm loss: 3.223679E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.889 | TFLOPs: 30.35 | +7: iteration 9650/ 21553 | consumed samples: 2470400 | consumed tokens: 5059379200 | elapsed time per iteration (s): 0.30 | learning rate: 1.263E-04 | global batch size: 256 | lm loss: 3.224144E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.484 | TFLOPs: 30.37 | +7: iteration 9660/ 21553 | consumed samples: 2472960 | consumed tokens: 5064622080 | elapsed time per iteration (s): 0.30 | learning rate: 1.261E-04 | global batch size: 256 | lm loss: 3.214090E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.270 | TFLOPs: 30.36 | +7: iteration 9670/ 21553 | consumed samples: 2475520 | consumed tokens: 5069864960 | elapsed time per iteration (s): 0.30 | learning rate: 1.260E-04 | global batch size: 256 | lm loss: 3.215639E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.157 | TFLOPs: 30.36 | +7: iteration 9680/ 21553 | consumed samples: 2478080 | consumed tokens: 5075107840 | elapsed time per iteration (s): 0.30 | learning rate: 1.259E-04 | global batch size: 256 | lm loss: 3.225824E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.600 | TFLOPs: 30.37 | +7: iteration 9690/ 21553 | consumed samples: 2480640 | consumed tokens: 5080350720 | elapsed time per iteration (s): 0.30 | learning rate: 1.257E-04 | global batch size: 256 | lm loss: 3.226527E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.347 | TFLOPs: 30.36 | +7: iteration 9700/ 21553 | consumed samples: 2483200 | consumed tokens: 5085593600 | elapsed time per iteration (s): 0.30 | learning rate: 1.256E-04 | global batch size: 256 | lm loss: 3.225555E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.385 | TFLOPs: 29.98 | +7: iteration 9710/ 21553 | consumed samples: 2485760 | consumed tokens: 5090836480 | elapsed time per iteration (s): 0.30 | learning rate: 1.255E-04 | global batch size: 256 | lm loss: 3.234605E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.316 | TFLOPs: 30.36 | +7: iteration 9720/ 21553 | consumed samples: 2488320 | consumed tokens: 5096079360 | elapsed time per iteration (s): 0.30 | learning rate: 1.254E-04 | global batch size: 256 | lm loss: 3.224392E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.532 | TFLOPs: 30.37 | +7: iteration 9730/ 21553 | consumed samples: 2490880 | consumed tokens: 5101322240 | elapsed time per iteration (s): 0.30 | learning rate: 1.252E-04 | global batch size: 256 | lm loss: 3.221471E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.502 | TFLOPs: 30.37 | +7: iteration 9740/ 21553 | consumed samples: 2493440 | consumed tokens: 5106565120 | elapsed time per iteration (s): 0.30 | learning rate: 1.251E-04 | global batch size: 256 | lm loss: 3.220256E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.366 | TFLOPs: 30.36 | +7: iteration 9750/ 21553 | consumed samples: 2496000 | consumed tokens: 5111808000 | elapsed time per iteration (s): 0.30 | learning rate: 1.250E-04 | global batch size: 256 | lm loss: 3.216846E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.607 | TFLOPs: 30.37 | +7: iteration 9760/ 21553 | consumed samples: 2498560 | consumed tokens: 5117050880 | elapsed time per iteration (s): 0.30 | learning rate: 1.248E-04 | global batch size: 256 | lm loss: 3.231262E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.433 | TFLOPs: 30.37 | +7: iteration 9770/ 21553 | consumed samples: 2501120 | consumed tokens: 5122293760 | elapsed time per iteration (s): 0.30 | learning rate: 1.247E-04 | global batch size: 256 | lm loss: 3.208435E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.450 | TFLOPs: 30.37 | +7: iteration 9780/ 21553 | consumed samples: 2503680 | consumed tokens: 5127536640 | elapsed time per iteration (s): 0.30 | learning rate: 1.246E-04 | global batch size: 256 | lm loss: 3.221466E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.521 | TFLOPs: 30.37 | +7: iteration 9790/ 21553 | consumed samples: 2506240 | consumed tokens: 5132779520 | elapsed time per iteration (s): 0.30 | learning rate: 1.244E-04 | global batch size: 256 | lm loss: 3.224276E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.599 | TFLOPs: 30.34 | +7: iteration 9800/ 21553 | consumed samples: 2508800 | consumed tokens: 5138022400 | elapsed time per iteration (s): 0.30 | learning rate: 1.243E-04 | global batch size: 256 | lm loss: 3.213341E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.703 | TFLOPs: 30.34 | +7: iteration 9810/ 21553 | consumed samples: 2511360 | consumed tokens: 5143265280 | elapsed time per iteration (s): 0.30 | learning rate: 1.242E-04 | global batch size: 256 | lm loss: 3.211435E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.711 | TFLOPs: 30.34 | +7: iteration 9820/ 21553 | consumed samples: 2513920 | consumed tokens: 5148508160 | elapsed time per iteration (s): 0.30 | learning rate: 1.240E-04 | global batch size: 256 | lm loss: 3.227490E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.786 | TFLOPs: 30.34 | +7: iteration 9830/ 21553 | consumed samples: 2516480 | consumed tokens: 5153751040 | elapsed time per iteration (s): 0.30 | learning rate: 1.239E-04 | global batch size: 256 | lm loss: 3.215143E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.082 | TFLOPs: 30.11 | +7: iteration 9840/ 21553 | consumed samples: 2519040 | consumed tokens: 5158993920 | elapsed time per iteration (s): 0.30 | learning rate: 1.238E-04 | global batch size: 256 | lm loss: 3.219446E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.134 | TFLOPs: 30.36 | +7: iteration 9850/ 21553 | consumed samples: 2521600 | consumed tokens: 5164236800 | elapsed time per iteration (s): 0.30 | learning rate: 1.237E-04 | global batch size: 256 | lm loss: 3.232749E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.040 | TFLOPs: 30.35 | +7: iteration 9860/ 21553 | consumed samples: 2524160 | consumed tokens: 5169479680 | elapsed time per iteration (s): 0.30 | learning rate: 1.235E-04 | global batch size: 256 | lm loss: 3.210431E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.617 | TFLOPs: 30.34 | +7: iteration 9870/ 21553 | consumed samples: 2526720 | consumed tokens: 5174722560 | elapsed time per iteration (s): 0.30 | learning rate: 1.234E-04 | global batch size: 256 | lm loss: 3.219874E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.614 | TFLOPs: 30.34 | +7: iteration 9880/ 21553 | consumed samples: 2529280 | consumed tokens: 5179965440 | elapsed time per iteration (s): 0.30 | learning rate: 1.233E-04 | global batch size: 256 | lm loss: 3.216505E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.672 | TFLOPs: 30.34 | +7: iteration 9890/ 21553 | consumed samples: 2531840 | consumed tokens: 5185208320 | elapsed time per iteration (s): 0.30 | learning rate: 1.231E-04 | global batch size: 256 | lm loss: 3.202692E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.080 | TFLOPs: 30.35 | +7: iteration 9900/ 21553 | consumed samples: 2534400 | consumed tokens: 5190451200 | elapsed time per iteration (s): 0.30 | learning rate: 1.230E-04 | global batch size: 256 | lm loss: 3.202989E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.813 | TFLOPs: 30.34 | +7: iteration 9910/ 21553 | consumed samples: 2536960 | consumed tokens: 5195694080 | elapsed time per iteration (s): 0.30 | learning rate: 1.229E-04 | global batch size: 256 | lm loss: 3.210956E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.431 | TFLOPs: 30.12 | +7: iteration 9920/ 21553 | consumed samples: 2539520 | consumed tokens: 5200936960 | elapsed time per iteration (s): 0.30 | learning rate: 1.227E-04 | global batch size: 256 | lm loss: 3.211708E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.599 | TFLOPs: 30.02 | +7: iteration 9930/ 21553 | consumed samples: 2542080 | consumed tokens: 5206179840 | elapsed time per iteration (s): 0.30 | learning rate: 1.226E-04 | global batch size: 256 | lm loss: 3.216629E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.720 | TFLOPs: 29.96 | +7: iteration 9940/ 21553 | consumed samples: 2544640 | consumed tokens: 5211422720 | elapsed time per iteration (s): 0.30 | learning rate: 1.225E-04 | global batch size: 256 | lm loss: 3.212878E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.842 | TFLOPs: 30.35 | +7: iteration 9950/ 21553 | consumed samples: 2547200 | consumed tokens: 5216665600 | elapsed time per iteration (s): 0.30 | learning rate: 1.223E-04 | global batch size: 256 | lm loss: 3.221687E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.168 | TFLOPs: 30.36 | +7: iteration 9960/ 21553 | consumed samples: 2549760 | consumed tokens: 5221908480 | elapsed time per iteration (s): 0.30 | learning rate: 1.222E-04 | global batch size: 256 | lm loss: 3.208241E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.443 | TFLOPs: 30.37 | +7: iteration 9970/ 21553 | consumed samples: 2552320 | consumed tokens: 5227151360 | elapsed time per iteration (s): 0.30 | learning rate: 1.221E-04 | global batch size: 256 | lm loss: 3.218339E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.660 | TFLOPs: 30.34 | +7: iteration 9980/ 21553 | consumed samples: 2554880 | consumed tokens: 5232394240 | elapsed time per iteration (s): 0.30 | learning rate: 1.219E-04 | global batch size: 256 | lm loss: 3.200151E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.287 | TFLOPs: 30.36 | +7: iteration 9990/ 21553 | consumed samples: 2557440 | consumed tokens: 5237637120 | elapsed time per iteration (s): 0.30 | learning rate: 1.218E-04 | global batch size: 256 | lm loss: 3.209400E+00 | grad norm: 0.499 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.115 | TFLOPs: 30.36 | +0: [2023-03-14 00:08:05,309] [INFO] [logging.py:68:log_dist] [Rank 0] step=10000, skipped=0, lr=[0.00012168458711439383, 0.00012168458711439383, 0.00012168458711439383], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 10000/ 21553 | consumed samples: 2560000 | consumed tokens: 5242880000 | elapsed time per iteration (s): 0.30 | learning rate: 1.217E-04 | global batch size: 256 | lm loss: 3.211357E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.154 | TFLOPs: 30.08 | +0: steps: 10000 loss: 3.1958 iter time (s): 0.294 samples/sec: 870.319 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 10000 | lm loss value: 3.695419E+00 | lm loss PPL: 4.026242E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 10000 to checkpoints_146m14b100m +0: [2023-03-14 00:08:05,429] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step10000 is begin to save! +0: [2023-03-14 00:08:05,431] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:08:05,515] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:08:05,515] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:08:05,531] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:08:05,531] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:08:05,545] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:08:05,546] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:08:05,560] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:08:05,561] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:08:05,575] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:08:05,576] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:08:05,590] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:08:05,590] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:08:05,605] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:08:05,605] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:08:05,620] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:08:05,620] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:08:05,635] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:08:05,635] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:08:05,650] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:08:05,650] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:08:05,665] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:08:05,665] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:08:05,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:08:05,680] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:08:05,695] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:08:05,695] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:08:05,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:08:05,710] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:08:05,724] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:08:05,725] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:08:05,739] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:08:05,739] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:08:05,741] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:08:05,741] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step10000/mp_rank_00_model_states.pt +0: [2023-03-14 00:08:05,741] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:08:05,743] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:08:05,760] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:08:05,793] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,793] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,793] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,795] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,795] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,795] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,796] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,796] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,798] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:08:05,796] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:08:05,797] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,797] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,801] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,801] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,803] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:08:05,803] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,803] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,802] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,802] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,800] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,803] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,800] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,804] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,804] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:08:05,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +3: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:08:05,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:08:05,807] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,807] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,798] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,798] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,799] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,799] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,804] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,805] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,805] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,806] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,806] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,806] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,806] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,806] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,806] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:08:05,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +4: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +7: [2023-03-14 00:08:05,809] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:08:05,809] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:08:05,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +6: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,811] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,811] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,812] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,812] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,812] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,814] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,814] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,818] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,819] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,819] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,819] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,822] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,823] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,823] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: [2023-03-14 00:08:05,824] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:08:05,824] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,828] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,828] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,828] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +1: [2023-03-14 00:08:05,833] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:08:05,833] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:08:05,833] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,833] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,833] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,833] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +5: [2023-03-14 00:08:05,838] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:08:05,838] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:08:05,838] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,839] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,839] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,839] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +2: [2023-03-14 00:08:05,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:08:05,844] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step10000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:08:05,844] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step10000 is ready now! +0: successfully saved checkpoint at iteration 10000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 421.21 +7: iteration 10010/ 21553 | consumed samples: 2562560 | consumed tokens: 5248122880 | elapsed time per iteration (s): 0.35 | learning rate: 1.216E-04 | global batch size: 256 | lm loss: 3.208581E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 731.741 | TFLOPs: 25.62 | +7: iteration 10020/ 21553 | consumed samples: 2565120 | consumed tokens: 5253365760 | elapsed time per iteration (s): 0.30 | learning rate: 1.214E-04 | global batch size: 256 | lm loss: 3.221873E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.743 | TFLOPs: 30.34 | +7: iteration 10030/ 21553 | consumed samples: 2567680 | consumed tokens: 5258608640 | elapsed time per iteration (s): 0.30 | learning rate: 1.213E-04 | global batch size: 256 | lm loss: 3.207655E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.802 | TFLOPs: 30.34 | +7: iteration 10040/ 21553 | consumed samples: 2570240 | consumed tokens: 5263851520 | elapsed time per iteration (s): 0.30 | learning rate: 1.212E-04 | global batch size: 256 | lm loss: 3.203121E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.457 | TFLOPs: 30.33 | +7: iteration 10050/ 21553 | consumed samples: 2572800 | consumed tokens: 5269094400 | elapsed time per iteration (s): 0.30 | learning rate: 1.210E-04 | global batch size: 256 | lm loss: 3.201468E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.789 | TFLOPs: 30.34 | +7: iteration 10060/ 21553 | consumed samples: 2575360 | consumed tokens: 5274337280 | elapsed time per iteration (s): 0.30 | learning rate: 1.209E-04 | global batch size: 256 | lm loss: 3.216793E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.675 | TFLOPs: 30.30 | +7: iteration 10070/ 21553 | consumed samples: 2577920 | consumed tokens: 5279580160 | elapsed time per iteration (s): 0.30 | learning rate: 1.208E-04 | global batch size: 256 | lm loss: 3.201749E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.459 | TFLOPs: 30.05 | +7: iteration 10080/ 21553 | consumed samples: 2580480 | consumed tokens: 5284823040 | elapsed time per iteration (s): 0.30 | learning rate: 1.206E-04 | global batch size: 256 | lm loss: 3.206656E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.526 | TFLOPs: 30.09 | +7: iteration 10090/ 21553 | consumed samples: 2583040 | consumed tokens: 5290065920 | elapsed time per iteration (s): 0.30 | learning rate: 1.205E-04 | global batch size: 256 | lm loss: 3.205091E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.366 | TFLOPs: 30.36 | +7: iteration 10100/ 21553 | consumed samples: 2585600 | consumed tokens: 5295308800 | elapsed time per iteration (s): 0.30 | learning rate: 1.204E-04 | global batch size: 256 | lm loss: 3.205267E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.653 | TFLOPs: 30.30 | +7: iteration 10110/ 21553 | consumed samples: 2588160 | consumed tokens: 5300551680 | elapsed time per iteration (s): 0.30 | learning rate: 1.202E-04 | global batch size: 256 | lm loss: 3.212170E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.277 | TFLOPs: 30.29 | +7: iteration 10120/ 21553 | consumed samples: 2590720 | consumed tokens: 5305794560 | elapsed time per iteration (s): 0.30 | learning rate: 1.201E-04 | global batch size: 256 | lm loss: 3.207676E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.791 | TFLOPs: 30.31 | +7: iteration 10130/ 21553 | consumed samples: 2593280 | consumed tokens: 5311037440 | elapsed time per iteration (s): 0.30 | learning rate: 1.200E-04 | global batch size: 256 | lm loss: 3.207841E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.215 | TFLOPs: 30.29 | +7: iteration 10140/ 21553 | consumed samples: 2595840 | consumed tokens: 5316280320 | elapsed time per iteration (s): 0.30 | learning rate: 1.198E-04 | global batch size: 256 | lm loss: 3.196095E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.633 | TFLOPs: 30.34 | +7: iteration 10150/ 21553 | consumed samples: 2598400 | consumed tokens: 5321523200 | elapsed time per iteration (s): 0.30 | learning rate: 1.197E-04 | global batch size: 256 | lm loss: 3.216744E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.013 | TFLOPs: 30.32 | +7: iteration 10160/ 21553 | consumed samples: 2600960 | consumed tokens: 5326766080 | elapsed time per iteration (s): 0.30 | learning rate: 1.196E-04 | global batch size: 256 | lm loss: 3.208045E+00 | grad norm: 0.478 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.047 | TFLOPs: 30.25 | +7: iteration 10170/ 21553 | consumed samples: 2603520 | consumed tokens: 5332008960 | elapsed time per iteration (s): 0.30 | learning rate: 1.194E-04 | global batch size: 256 | lm loss: 3.204705E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.640 | TFLOPs: 30.34 | +7: iteration 10180/ 21553 | consumed samples: 2606080 | consumed tokens: 5337251840 | elapsed time per iteration (s): 0.30 | learning rate: 1.193E-04 | global batch size: 256 | lm loss: 3.195555E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.729 | TFLOPs: 30.34 | +7: iteration 10190/ 21553 | consumed samples: 2608640 | consumed tokens: 5342494720 | elapsed time per iteration (s): 0.30 | learning rate: 1.192E-04 | global batch size: 256 | lm loss: 3.207802E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.961 | TFLOPs: 30.35 | +7: iteration 10200/ 21553 | consumed samples: 2611200 | consumed tokens: 5347737600 | elapsed time per iteration (s): 0.30 | learning rate: 1.191E-04 | global batch size: 256 | lm loss: 3.208031E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.094 | TFLOPs: 30.35 | +7: iteration 10210/ 21553 | consumed samples: 2613760 | consumed tokens: 5352980480 | elapsed time per iteration (s): 0.30 | learning rate: 1.189E-04 | global batch size: 256 | lm loss: 3.205986E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.690 | TFLOPs: 30.34 | +7: iteration 10220/ 21553 | consumed samples: 2616320 | consumed tokens: 5358223360 | elapsed time per iteration (s): 0.30 | learning rate: 1.188E-04 | global batch size: 256 | lm loss: 3.195582E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.614 | TFLOPs: 30.34 | +7: iteration 10230/ 21553 | consumed samples: 2618880 | consumed tokens: 5363466240 | elapsed time per iteration (s): 0.30 | learning rate: 1.187E-04 | global batch size: 256 | lm loss: 3.200570E+00 | grad norm: 0.467 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.054 | TFLOPs: 30.35 | +7: iteration 10240/ 21553 | consumed samples: 2621440 | consumed tokens: 5368709120 | elapsed time per iteration (s): 0.30 | learning rate: 1.185E-04 | global batch size: 256 | lm loss: 3.197876E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.717 | TFLOPs: 30.34 | +7: iteration 10250/ 21553 | consumed samples: 2624000 | consumed tokens: 5373952000 | elapsed time per iteration (s): 0.30 | learning rate: 1.184E-04 | global batch size: 256 | lm loss: 3.204136E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.537 | TFLOPs: 30.34 | +7: iteration 10260/ 21553 | consumed samples: 2626560 | consumed tokens: 5379194880 | elapsed time per iteration (s): 0.30 | learning rate: 1.183E-04 | global batch size: 256 | lm loss: 3.209488E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.757 | TFLOPs: 29.96 | +7: iteration 10270/ 21553 | consumed samples: 2629120 | consumed tokens: 5384437760 | elapsed time per iteration (s): 0.30 | learning rate: 1.181E-04 | global batch size: 256 | lm loss: 3.194658E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.288 | TFLOPs: 30.08 | +7: iteration 10280/ 21553 | consumed samples: 2631680 | consumed tokens: 5389680640 | elapsed time per iteration (s): 0.30 | learning rate: 1.180E-04 | global batch size: 256 | lm loss: 3.184906E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.982 | TFLOPs: 30.32 | +7: iteration 10290/ 21553 | consumed samples: 2634240 | consumed tokens: 5394923520 | elapsed time per iteration (s): 0.30 | learning rate: 1.179E-04 | global batch size: 256 | lm loss: 3.212188E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.024 | TFLOPs: 30.32 | +7: iteration 10300/ 21553 | consumed samples: 2636800 | consumed tokens: 5400166400 | elapsed time per iteration (s): 0.30 | learning rate: 1.177E-04 | global batch size: 256 | lm loss: 3.209999E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.432 | TFLOPs: 30.33 | +7: iteration 10310/ 21553 | consumed samples: 2639360 | consumed tokens: 5405409280 | elapsed time per iteration (s): 0.30 | learning rate: 1.176E-04 | global batch size: 256 | lm loss: 3.200993E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.204 | TFLOPs: 30.32 | +7: iteration 10320/ 21553 | consumed samples: 2641920 | consumed tokens: 5410652160 | elapsed time per iteration (s): 0.30 | learning rate: 1.175E-04 | global batch size: 256 | lm loss: 3.192042E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.025 | TFLOPs: 30.32 | +7: iteration 10330/ 21553 | consumed samples: 2644480 | consumed tokens: 5415895040 | elapsed time per iteration (s): 0.30 | learning rate: 1.173E-04 | global batch size: 256 | lm loss: 3.202364E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.112 | TFLOPs: 30.32 | +7: iteration 10340/ 21553 | consumed samples: 2647040 | consumed tokens: 5421137920 | elapsed time per iteration (s): 0.30 | learning rate: 1.172E-04 | global batch size: 256 | lm loss: 3.198510E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.410 | TFLOPs: 30.33 | +7: iteration 10350/ 21553 | consumed samples: 2649600 | consumed tokens: 5426380800 | elapsed time per iteration (s): 0.30 | learning rate: 1.171E-04 | global batch size: 256 | lm loss: 3.192077E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.074 | TFLOPs: 30.32 | +7: iteration 10360/ 21553 | consumed samples: 2652160 | consumed tokens: 5431623680 | elapsed time per iteration (s): 0.30 | learning rate: 1.169E-04 | global batch size: 256 | lm loss: 3.199744E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.240 | TFLOPs: 30.32 | +7: iteration 10370/ 21553 | consumed samples: 2654720 | consumed tokens: 5436866560 | elapsed time per iteration (s): 0.30 | learning rate: 1.168E-04 | global batch size: 256 | lm loss: 3.201410E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.355 | TFLOPs: 30.33 | +7: iteration 10380/ 21553 | consumed samples: 2657280 | consumed tokens: 5442109440 | elapsed time per iteration (s): 0.30 | learning rate: 1.167E-04 | global batch size: 256 | lm loss: 3.189254E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.450 | TFLOPs: 30.33 | +7: iteration 10390/ 21553 | consumed samples: 2659840 | consumed tokens: 5447352320 | elapsed time per iteration (s): 0.30 | learning rate: 1.165E-04 | global batch size: 256 | lm loss: 3.183415E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.453 | TFLOPs: 30.33 | +7: iteration 10400/ 21553 | consumed samples: 2662400 | consumed tokens: 5452595200 | elapsed time per iteration (s): 0.30 | learning rate: 1.164E-04 | global batch size: 256 | lm loss: 3.196588E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.199 | TFLOPs: 30.32 | +7: iteration 10410/ 21553 | consumed samples: 2664960 | consumed tokens: 5457838080 | elapsed time per iteration (s): 0.30 | learning rate: 1.163E-04 | global batch size: 256 | lm loss: 3.197598E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.561 | TFLOPs: 30.34 | +7: iteration 10420/ 21553 | consumed samples: 2667520 | consumed tokens: 5463080960 | elapsed time per iteration (s): 0.30 | learning rate: 1.161E-04 | global batch size: 256 | lm loss: 3.185423E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.253 | TFLOPs: 30.33 | +7: iteration 10430/ 21553 | consumed samples: 2670080 | consumed tokens: 5468323840 | elapsed time per iteration (s): 0.30 | learning rate: 1.160E-04 | global batch size: 256 | lm loss: 3.201142E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.515 | TFLOPs: 30.33 | +7: iteration 10440/ 21553 | consumed samples: 2672640 | consumed tokens: 5473566720 | elapsed time per iteration (s): 0.30 | learning rate: 1.159E-04 | global batch size: 256 | lm loss: 3.194732E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.711 | TFLOPs: 30.34 | +7: iteration 10450/ 21553 | consumed samples: 2675200 | consumed tokens: 5478809600 | elapsed time per iteration (s): 0.30 | learning rate: 1.158E-04 | global batch size: 256 | lm loss: 3.199726E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.797 | TFLOPs: 30.34 | +7: iteration 10460/ 21553 | consumed samples: 2677760 | consumed tokens: 5484052480 | elapsed time per iteration (s): 0.30 | learning rate: 1.156E-04 | global batch size: 256 | lm loss: 3.188984E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.837 | TFLOPs: 30.35 | +7: iteration 10470/ 21553 | consumed samples: 2680320 | consumed tokens: 5489295360 | elapsed time per iteration (s): 0.30 | learning rate: 1.155E-04 | global batch size: 256 | lm loss: 3.194963E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.123 | TFLOPs: 30.36 | +7: iteration 10480/ 21553 | consumed samples: 2682880 | consumed tokens: 5494538240 | elapsed time per iteration (s): 0.30 | learning rate: 1.154E-04 | global batch size: 256 | lm loss: 3.185138E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.032 | TFLOPs: 30.35 | +7: iteration 10490/ 21553 | consumed samples: 2685440 | consumed tokens: 5499781120 | elapsed time per iteration (s): 0.30 | learning rate: 1.152E-04 | global batch size: 256 | lm loss: 3.190103E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.980 | TFLOPs: 30.35 | +7: iteration 10500/ 21553 | consumed samples: 2688000 | consumed tokens: 5505024000 | elapsed time per iteration (s): 0.30 | learning rate: 1.151E-04 | global batch size: 256 | lm loss: 3.207728E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.609 | TFLOPs: 30.34 | +7: iteration 10510/ 21553 | consumed samples: 2690560 | consumed tokens: 5510266880 | elapsed time per iteration (s): 0.30 | learning rate: 1.150E-04 | global batch size: 256 | lm loss: 3.203032E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.824 | TFLOPs: 30.35 | +7: iteration 10520/ 21553 | consumed samples: 2693120 | consumed tokens: 5515509760 | elapsed time per iteration (s): 0.30 | learning rate: 1.148E-04 | global batch size: 256 | lm loss: 3.187860E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.671 | TFLOPs: 30.34 | +7: iteration 10530/ 21553 | consumed samples: 2695680 | consumed tokens: 5520752640 | elapsed time per iteration (s): 0.30 | learning rate: 1.147E-04 | global batch size: 256 | lm loss: 3.199875E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.175 | TFLOPs: 30.32 | +7: iteration 10540/ 21553 | consumed samples: 2698240 | consumed tokens: 5525995520 | elapsed time per iteration (s): 0.30 | learning rate: 1.146E-04 | global batch size: 256 | lm loss: 3.196730E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.331 | TFLOPs: 30.33 | +7: iteration 10550/ 21553 | consumed samples: 2700800 | consumed tokens: 5531238400 | elapsed time per iteration (s): 0.30 | learning rate: 1.144E-04 | global batch size: 256 | lm loss: 3.190950E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.893 | TFLOPs: 30.35 | +7: iteration 10560/ 21553 | consumed samples: 2703360 | consumed tokens: 5536481280 | elapsed time per iteration (s): 0.30 | learning rate: 1.143E-04 | global batch size: 256 | lm loss: 3.164150E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.685 | TFLOPs: 30.34 | +7: iteration 10570/ 21553 | consumed samples: 2705920 | consumed tokens: 5541724160 | elapsed time per iteration (s): 0.30 | learning rate: 1.142E-04 | global batch size: 256 | lm loss: 3.187738E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.921 | TFLOPs: 30.35 | +7: iteration 10580/ 21553 | consumed samples: 2708480 | consumed tokens: 5546967040 | elapsed time per iteration (s): 0.30 | learning rate: 1.140E-04 | global batch size: 256 | lm loss: 3.190108E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.234 | TFLOPs: 30.32 | +7: iteration 10590/ 21553 | consumed samples: 2711040 | consumed tokens: 5552209920 | elapsed time per iteration (s): 0.30 | learning rate: 1.139E-04 | global batch size: 256 | lm loss: 3.188778E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.061 | TFLOPs: 30.32 | +7: iteration 10600/ 21553 | consumed samples: 2713600 | consumed tokens: 5557452800 | elapsed time per iteration (s): 0.30 | learning rate: 1.138E-04 | global batch size: 256 | lm loss: 3.199319E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.058 | TFLOPs: 30.32 | +7: iteration 10610/ 21553 | consumed samples: 2716160 | consumed tokens: 5562695680 | elapsed time per iteration (s): 0.30 | learning rate: 1.136E-04 | global batch size: 256 | lm loss: 3.186539E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.390 | TFLOPs: 30.33 | +7: iteration 10620/ 21553 | consumed samples: 2718720 | consumed tokens: 5567938560 | elapsed time per iteration (s): 0.30 | learning rate: 1.135E-04 | global batch size: 256 | lm loss: 3.193292E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.166 | TFLOPs: 30.32 | +7: iteration 10630/ 21553 | consumed samples: 2721280 | consumed tokens: 5573181440 | elapsed time per iteration (s): 0.30 | learning rate: 1.134E-04 | global batch size: 256 | lm loss: 3.176554E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.775 | TFLOPs: 30.34 | +7: iteration 10640/ 21553 | consumed samples: 2723840 | consumed tokens: 5578424320 | elapsed time per iteration (s): 0.30 | learning rate: 1.132E-04 | global batch size: 256 | lm loss: 3.181639E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.338 | TFLOPs: 30.33 | +7: iteration 10650/ 21553 | consumed samples: 2726400 | consumed tokens: 5583667200 | elapsed time per iteration (s): 0.30 | learning rate: 1.131E-04 | global batch size: 256 | lm loss: 3.184310E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.233 | TFLOPs: 30.32 | +7: iteration 10660/ 21553 | consumed samples: 2728960 | consumed tokens: 5588910080 | elapsed time per iteration (s): 0.30 | learning rate: 1.130E-04 | global batch size: 256 | lm loss: 3.198481E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.091 | TFLOPs: 30.32 | +7: iteration 10670/ 21553 | consumed samples: 2731520 | consumed tokens: 5594152960 | elapsed time per iteration (s): 0.30 | learning rate: 1.128E-04 | global batch size: 256 | lm loss: 3.195098E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.429 | TFLOPs: 30.33 | +7: iteration 10680/ 21553 | consumed samples: 2734080 | consumed tokens: 5599395840 | elapsed time per iteration (s): 0.30 | learning rate: 1.127E-04 | global batch size: 256 | lm loss: 3.168513E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.518 | TFLOPs: 30.30 | +7: iteration 10690/ 21553 | consumed samples: 2736640 | consumed tokens: 5604638720 | elapsed time per iteration (s): 0.30 | learning rate: 1.126E-04 | global batch size: 256 | lm loss: 3.184194E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.013 | TFLOPs: 30.32 | +7: iteration 10700/ 21553 | consumed samples: 2739200 | consumed tokens: 5609881600 | elapsed time per iteration (s): 0.30 | learning rate: 1.124E-04 | global batch size: 256 | lm loss: 3.178067E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.485 | TFLOPs: 30.33 | +7: iteration 10710/ 21553 | consumed samples: 2741760 | consumed tokens: 5615124480 | elapsed time per iteration (s): 0.30 | learning rate: 1.123E-04 | global batch size: 256 | lm loss: 3.196946E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.051 | TFLOPs: 30.32 | +7: iteration 10720/ 21553 | consumed samples: 2744320 | consumed tokens: 5620367360 | elapsed time per iteration (s): 0.30 | learning rate: 1.122E-04 | global batch size: 256 | lm loss: 3.185332E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.286 | TFLOPs: 30.33 | +7: iteration 10730/ 21553 | consumed samples: 2746880 | consumed tokens: 5625610240 | elapsed time per iteration (s): 0.30 | learning rate: 1.120E-04 | global batch size: 256 | lm loss: 3.177150E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.283 | TFLOPs: 30.33 | +7: iteration 10740/ 21553 | consumed samples: 2749440 | consumed tokens: 5630853120 | elapsed time per iteration (s): 0.30 | learning rate: 1.119E-04 | global batch size: 256 | lm loss: 3.184866E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.069 | TFLOPs: 30.32 | +7: iteration 10750/ 21553 | consumed samples: 2752000 | consumed tokens: 5636096000 | elapsed time per iteration (s): 0.30 | learning rate: 1.118E-04 | global batch size: 256 | lm loss: 3.178247E+00 | grad norm: 0.464 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.433 | TFLOPs: 30.33 | +7: iteration 10760/ 21553 | consumed samples: 2754560 | consumed tokens: 5641338880 | elapsed time per iteration (s): 0.30 | learning rate: 1.116E-04 | global batch size: 256 | lm loss: 3.176560E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.473 | TFLOPs: 30.33 | +7: iteration 10770/ 21553 | consumed samples: 2757120 | consumed tokens: 5646581760 | elapsed time per iteration (s): 0.30 | learning rate: 1.115E-04 | global batch size: 256 | lm loss: 3.172430E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.836 | TFLOPs: 30.31 | +7: iteration 10780/ 21553 | consumed samples: 2759680 | consumed tokens: 5651824640 | elapsed time per iteration (s): 0.30 | learning rate: 1.114E-04 | global batch size: 256 | lm loss: 3.184198E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.976 | TFLOPs: 30.32 | +7: iteration 10790/ 21553 | consumed samples: 2762240 | consumed tokens: 5657067520 | elapsed time per iteration (s): 0.30 | learning rate: 1.112E-04 | global batch size: 256 | lm loss: 3.180487E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.404 | TFLOPs: 30.33 | +7: iteration 10800/ 21553 | consumed samples: 2764800 | consumed tokens: 5662310400 | elapsed time per iteration (s): 0.30 | learning rate: 1.111E-04 | global batch size: 256 | lm loss: 3.184205E+00 | grad norm: 0.465 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.178 | TFLOPs: 30.32 | +7: iteration 10810/ 21553 | consumed samples: 2767360 | consumed tokens: 5667553280 | elapsed time per iteration (s): 0.30 | learning rate: 1.110E-04 | global batch size: 256 | lm loss: 3.175576E+00 | grad norm: 0.466 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.203 | TFLOPs: 30.36 | +7: iteration 10820/ 21553 | consumed samples: 2769920 | consumed tokens: 5672796160 | elapsed time per iteration (s): 0.30 | learning rate: 1.109E-04 | global batch size: 256 | lm loss: 3.193557E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.019 | TFLOPs: 30.35 | +7: iteration 10830/ 21553 | consumed samples: 2772480 | consumed tokens: 5678039040 | elapsed time per iteration (s): 0.30 | learning rate: 1.107E-04 | global batch size: 256 | lm loss: 3.184229E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.425 | TFLOPs: 30.37 | +7: iteration 10840/ 21553 | consumed samples: 2775040 | consumed tokens: 5683281920 | elapsed time per iteration (s): 0.30 | learning rate: 1.106E-04 | global batch size: 256 | lm loss: 3.178323E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.473 | TFLOPs: 30.37 | +7: iteration 10850/ 21553 | consumed samples: 2777600 | consumed tokens: 5688524800 | elapsed time per iteration (s): 0.30 | learning rate: 1.105E-04 | global batch size: 256 | lm loss: 3.178579E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.512 | TFLOPs: 30.37 | +7: iteration 10860/ 21553 | consumed samples: 2780160 | consumed tokens: 5693767680 | elapsed time per iteration (s): 0.30 | learning rate: 1.103E-04 | global batch size: 256 | lm loss: 3.190368E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.371 | TFLOPs: 30.36 | +7: iteration 10870/ 21553 | consumed samples: 2782720 | consumed tokens: 5699010560 | elapsed time per iteration (s): 0.30 | learning rate: 1.102E-04 | global batch size: 256 | lm loss: 3.180884E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.396 | TFLOPs: 30.37 | +7: iteration 10880/ 21553 | consumed samples: 2785280 | consumed tokens: 5704253440 | elapsed time per iteration (s): 0.30 | learning rate: 1.101E-04 | global batch size: 256 | lm loss: 3.194453E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.561 | TFLOPs: 30.37 | +7: iteration 10890/ 21553 | consumed samples: 2787840 | consumed tokens: 5709496320 | elapsed time per iteration (s): 0.30 | learning rate: 1.099E-04 | global batch size: 256 | lm loss: 3.180427E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.607 | TFLOPs: 30.37 | +7: iteration 10900/ 21553 | consumed samples: 2790400 | consumed tokens: 5714739200 | elapsed time per iteration (s): 0.30 | learning rate: 1.098E-04 | global batch size: 256 | lm loss: 3.179741E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.162 | TFLOPs: 30.36 | +7: iteration 10910/ 21553 | consumed samples: 2792960 | consumed tokens: 5719982080 | elapsed time per iteration (s): 0.30 | learning rate: 1.097E-04 | global batch size: 256 | lm loss: 3.198482E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.887 | TFLOPs: 30.35 | +7: iteration 10920/ 21553 | consumed samples: 2795520 | consumed tokens: 5725224960 | elapsed time per iteration (s): 0.30 | learning rate: 1.095E-04 | global batch size: 256 | lm loss: 3.166132E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.486 | TFLOPs: 30.37 | +7: iteration 10930/ 21553 | consumed samples: 2798080 | consumed tokens: 5730467840 | elapsed time per iteration (s): 0.30 | learning rate: 1.094E-04 | global batch size: 256 | lm loss: 3.167795E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.951 | TFLOPs: 30.35 | +7: iteration 10940/ 21553 | consumed samples: 2800640 | consumed tokens: 5735710720 | elapsed time per iteration (s): 0.31 | learning rate: 1.093E-04 | global batch size: 256 | lm loss: 3.187412E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.881 | TFLOPs: 29.12 | +7: iteration 10950/ 21553 | consumed samples: 2803200 | consumed tokens: 5740953600 | elapsed time per iteration (s): 0.30 | learning rate: 1.091E-04 | global batch size: 256 | lm loss: 3.189277E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.321 | TFLOPs: 30.36 | +7: iteration 10960/ 21553 | consumed samples: 2805760 | consumed tokens: 5746196480 | elapsed time per iteration (s): 0.30 | learning rate: 1.090E-04 | global batch size: 256 | lm loss: 3.179792E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.118 | TFLOPs: 30.36 | +7: iteration 10970/ 21553 | consumed samples: 2808320 | consumed tokens: 5751439360 | elapsed time per iteration (s): 0.31 | learning rate: 1.089E-04 | global batch size: 256 | lm loss: 3.179247E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.852 | TFLOPs: 29.12 | +7: iteration 10980/ 21553 | consumed samples: 2810880 | consumed tokens: 5756682240 | elapsed time per iteration (s): 0.31 | learning rate: 1.087E-04 | global batch size: 256 | lm loss: 3.164548E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 820.362 | TFLOPs: 28.72 | +7: iteration 10990/ 21553 | consumed samples: 2813440 | consumed tokens: 5761925120 | elapsed time per iteration (s): 0.30 | learning rate: 1.086E-04 | global batch size: 256 | lm loss: 3.174368E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.421 | TFLOPs: 30.37 | +7: iteration 11000/ 21553 | consumed samples: 2816000 | consumed tokens: 5767168000 | elapsed time per iteration (s): 0.30 | learning rate: 1.085E-04 | global batch size: 256 | lm loss: 3.155588E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.097 | TFLOPs: 30.35 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 11000 | lm loss value: 3.812842E+00 | lm loss PPL: 4.527893E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 11000 to checkpoints_146m14b100m +0: [2023-03-14 00:13:01,914] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step11000 is begin to save! +0: [2023-03-14 00:13:01,917] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:13:02,001] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:13:02,001] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:13:02,017] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:13:02,017] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:13:02,032] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:13:02,032] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:13:02,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:13:02,047] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:13:02,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:13:02,062] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:13:02,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:13:02,077] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:13:02,092] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:13:02,092] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:13:02,107] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:13:02,107] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:13:02,122] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:13:02,122] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:13:02,137] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:13:02,137] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:13:02,152] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:13:02,152] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:13:02,168] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:13:02,168] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:13:02,183] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:13:02,183] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:13:02,198] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:13:02,198] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:13:02,213] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:13:02,213] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:13:02,228] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:13:02,228] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:13:02,229] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:13:02,230] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step11000/mp_rank_00_model_states.pt +0: [2023-03-14 00:13:02,230] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:13:02,232] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:13:02,248] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:13:02,281] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,283] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,284] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,284] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,283] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,283] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,284] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,284] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,284] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,284] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,284] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,285] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,285] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,286] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,286] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,287] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,287] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,287] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,288] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,288] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,287] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,287] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,288] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,289] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,288] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,288] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,289] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,289] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,289] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,289] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,289] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,289] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,290] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,290] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,290] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,291] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,291] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,291] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,291] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,291] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,291] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,292] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,292] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,292] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,292] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,292] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,292] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +7: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +2: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +2: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +4: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,293] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,293] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:13:02,294] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,294] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,294] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,294] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,294] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +6: [2023-03-14 00:13:02,294] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +1: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,296] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,296] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,297] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,297] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,297] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,297] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:13:02,297] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,297] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +3: [2023-03-14 00:13:02,298] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:13:02,298] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:13:02,298] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: [2023-03-14 00:13:02,302] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:13:02,302] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,306] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,306] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,306] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,312] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,312] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,312] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,314] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,315] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,315] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +5: [2023-03-14 00:13:02,319] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:13:02,319] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step11000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:13:02,319] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step11000 is ready now! +0: successfully saved checkpoint at iteration 11000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 409.93 +7: iteration 11010/ 21553 | consumed samples: 2818560 | consumed tokens: 5772410880 | elapsed time per iteration (s): 0.35 | learning rate: 1.083E-04 | global batch size: 256 | lm loss: 3.157730E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 741.793 | TFLOPs: 25.97 | +7: iteration 11020/ 21553 | consumed samples: 2821120 | consumed tokens: 5777653760 | elapsed time per iteration (s): 0.29 | learning rate: 1.082E-04 | global batch size: 256 | lm loss: 3.161742E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.118 | TFLOPs: 30.39 | +7: iteration 11030/ 21553 | consumed samples: 2823680 | consumed tokens: 5782896640 | elapsed time per iteration (s): 0.30 | learning rate: 1.081E-04 | global batch size: 256 | lm loss: 3.169490E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.295 | TFLOPs: 30.36 | +7: iteration 11040/ 21553 | consumed samples: 2826240 | consumed tokens: 5788139520 | elapsed time per iteration (s): 0.30 | learning rate: 1.079E-04 | global batch size: 256 | lm loss: 3.181167E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.297 | TFLOPs: 30.36 | +7: iteration 11050/ 21553 | consumed samples: 2828800 | consumed tokens: 5793382400 | elapsed time per iteration (s): 0.30 | learning rate: 1.078E-04 | global batch size: 256 | lm loss: 3.172411E+00 | grad norm: 0.505 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.509 | TFLOPs: 30.37 | +7: iteration 11060/ 21553 | consumed samples: 2831360 | consumed tokens: 5798625280 | elapsed time per iteration (s): 0.30 | learning rate: 1.077E-04 | global batch size: 256 | lm loss: 3.178645E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.117 | TFLOPs: 30.22 | +7: iteration 11070/ 21553 | consumed samples: 2833920 | consumed tokens: 5803868160 | elapsed time per iteration (s): 0.30 | learning rate: 1.075E-04 | global batch size: 256 | lm loss: 3.171093E+00 | grad norm: 0.389 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.930 | TFLOPs: 30.31 | +7: iteration 11080/ 21553 | consumed samples: 2836480 | consumed tokens: 5809111040 | elapsed time per iteration (s): 0.30 | learning rate: 1.074E-04 | global batch size: 256 | lm loss: 3.179115E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.895 | TFLOPs: 30.31 | +7: iteration 11090/ 21553 | consumed samples: 2839040 | consumed tokens: 5814353920 | elapsed time per iteration (s): 0.30 | learning rate: 1.073E-04 | global batch size: 256 | lm loss: 3.187277E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.046 | TFLOPs: 30.32 | +7: iteration 11100/ 21553 | consumed samples: 2841600 | consumed tokens: 5819596800 | elapsed time per iteration (s): 0.30 | learning rate: 1.071E-04 | global batch size: 256 | lm loss: 3.175785E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.999 | TFLOPs: 30.32 | +7: iteration 11110/ 21553 | consumed samples: 2844160 | consumed tokens: 5824839680 | elapsed time per iteration (s): 0.30 | learning rate: 1.070E-04 | global batch size: 256 | lm loss: 3.172704E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.000 | TFLOPs: 30.32 | +7: iteration 11120/ 21553 | consumed samples: 2846720 | consumed tokens: 5830082560 | elapsed time per iteration (s): 0.32 | learning rate: 1.069E-04 | global batch size: 256 | lm loss: 3.179169E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 807.881 | TFLOPs: 28.28 | +7: iteration 11130/ 21553 | consumed samples: 2849280 | consumed tokens: 5835325440 | elapsed time per iteration (s): 0.30 | learning rate: 1.067E-04 | global batch size: 256 | lm loss: 3.164671E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.216 | TFLOPs: 30.32 | +7: iteration 11140/ 21553 | consumed samples: 2851840 | consumed tokens: 5840568320 | elapsed time per iteration (s): 0.30 | learning rate: 1.066E-04 | global batch size: 256 | lm loss: 3.168213E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.093 | TFLOPs: 30.32 | +7: iteration 11150/ 21553 | consumed samples: 2854400 | consumed tokens: 5845811200 | elapsed time per iteration (s): 0.30 | learning rate: 1.065E-04 | global batch size: 256 | lm loss: 3.153658E+00 | grad norm: 0.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.097 | TFLOPs: 30.32 | +7: iteration 11160/ 21553 | consumed samples: 2856960 | consumed tokens: 5851054080 | elapsed time per iteration (s): 0.30 | learning rate: 1.063E-04 | global batch size: 256 | lm loss: 3.160860E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.054 | TFLOPs: 30.32 | +7: iteration 11170/ 21553 | consumed samples: 2859520 | consumed tokens: 5856296960 | elapsed time per iteration (s): 0.30 | learning rate: 1.062E-04 | global batch size: 256 | lm loss: 3.167270E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.634 | TFLOPs: 30.30 | +7: iteration 11180/ 21553 | consumed samples: 2862080 | consumed tokens: 5861539840 | elapsed time per iteration (s): 0.30 | learning rate: 1.061E-04 | global batch size: 256 | lm loss: 3.159828E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.318 | TFLOPs: 30.33 | +7: iteration 11190/ 21553 | consumed samples: 2864640 | consumed tokens: 5866782720 | elapsed time per iteration (s): 0.30 | learning rate: 1.060E-04 | global batch size: 256 | lm loss: 3.172008E+00 | grad norm: 0.396 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.929 | TFLOPs: 30.31 | +7: iteration 11200/ 21553 | consumed samples: 2867200 | consumed tokens: 5872025600 | elapsed time per iteration (s): 0.30 | learning rate: 1.058E-04 | global batch size: 256 | lm loss: 3.170145E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.941 | TFLOPs: 30.31 | +7: iteration 11210/ 21553 | consumed samples: 2869760 | consumed tokens: 5877268480 | elapsed time per iteration (s): 0.30 | learning rate: 1.057E-04 | global batch size: 256 | lm loss: 3.179711E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.935 | TFLOPs: 30.28 | +7: iteration 11220/ 21553 | consumed samples: 2872320 | consumed tokens: 5882511360 | elapsed time per iteration (s): 0.30 | learning rate: 1.056E-04 | global batch size: 256 | lm loss: 3.172782E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.365 | TFLOPs: 30.29 | +7: iteration 11230/ 21553 | consumed samples: 2874880 | consumed tokens: 5887754240 | elapsed time per iteration (s): 0.30 | learning rate: 1.054E-04 | global batch size: 256 | lm loss: 3.154761E+00 | grad norm: 0.405 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.816 | TFLOPs: 30.31 | +7: iteration 11240/ 21553 | consumed samples: 2877440 | consumed tokens: 5892997120 | elapsed time per iteration (s): 0.30 | learning rate: 1.053E-04 | global batch size: 256 | lm loss: 3.177566E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.412 | TFLOPs: 30.30 | +7: iteration 11250/ 21553 | consumed samples: 2880000 | consumed tokens: 5898240000 | elapsed time per iteration (s): 0.30 | learning rate: 1.052E-04 | global batch size: 256 | lm loss: 3.175220E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.504 | TFLOPs: 30.30 | +7: iteration 11260/ 21553 | consumed samples: 2882560 | consumed tokens: 5903482880 | elapsed time per iteration (s): 0.30 | learning rate: 1.050E-04 | global batch size: 256 | lm loss: 3.185138E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.656 | TFLOPs: 30.27 | +7: iteration 11270/ 21553 | consumed samples: 2885120 | consumed tokens: 5908725760 | elapsed time per iteration (s): 0.30 | learning rate: 1.049E-04 | global batch size: 256 | lm loss: 3.171073E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.922 | TFLOPs: 30.31 | +7: iteration 11280/ 21553 | consumed samples: 2887680 | consumed tokens: 5913968640 | elapsed time per iteration (s): 0.30 | learning rate: 1.048E-04 | global batch size: 256 | lm loss: 3.164423E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.707 | TFLOPs: 30.31 | +7: iteration 11290/ 21553 | consumed samples: 2890240 | consumed tokens: 5919211520 | elapsed time per iteration (s): 0.30 | learning rate: 1.046E-04 | global batch size: 256 | lm loss: 3.172631E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.550 | TFLOPs: 30.30 | +7: iteration 11300/ 21553 | consumed samples: 2892800 | consumed tokens: 5924454400 | elapsed time per iteration (s): 0.30 | learning rate: 1.045E-04 | global batch size: 256 | lm loss: 3.172272E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.675 | TFLOPs: 29.46 | +7: iteration 11310/ 21553 | consumed samples: 2895360 | consumed tokens: 5929697280 | elapsed time per iteration (s): 0.30 | learning rate: 1.044E-04 | global batch size: 256 | lm loss: 3.178727E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.506 | TFLOPs: 30.33 | +7: iteration 11320/ 21553 | consumed samples: 2897920 | consumed tokens: 5934940160 | elapsed time per iteration (s): 0.30 | learning rate: 1.042E-04 | global batch size: 256 | lm loss: 3.169007E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.309 | TFLOPs: 30.33 | +7: iteration 11330/ 21553 | consumed samples: 2900480 | consumed tokens: 5940183040 | elapsed time per iteration (s): 0.30 | learning rate: 1.041E-04 | global batch size: 256 | lm loss: 3.157420E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.111 | TFLOPs: 30.32 | +7: iteration 11340/ 21553 | consumed samples: 2903040 | consumed tokens: 5945425920 | elapsed time per iteration (s): 0.30 | learning rate: 1.040E-04 | global batch size: 256 | lm loss: 3.164091E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.728 | TFLOPs: 30.31 | +7: iteration 11350/ 21553 | consumed samples: 2905600 | consumed tokens: 5950668800 | elapsed time per iteration (s): 0.30 | learning rate: 1.038E-04 | global batch size: 256 | lm loss: 3.165773E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.776 | TFLOPs: 30.31 | +7: iteration 11360/ 21553 | consumed samples: 2908160 | consumed tokens: 5955911680 | elapsed time per iteration (s): 0.30 | learning rate: 1.037E-04 | global batch size: 256 | lm loss: 3.161273E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.235 | TFLOPs: 30.32 | +7: iteration 11370/ 21553 | consumed samples: 2910720 | consumed tokens: 5961154560 | elapsed time per iteration (s): 0.30 | learning rate: 1.036E-04 | global batch size: 256 | lm loss: 3.163766E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.935 | TFLOPs: 30.31 | +7: iteration 11380/ 21553 | consumed samples: 2913280 | consumed tokens: 5966397440 | elapsed time per iteration (s): 0.30 | learning rate: 1.034E-04 | global batch size: 256 | lm loss: 3.152377E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.683 | TFLOPs: 30.31 | +7: iteration 11390/ 21553 | consumed samples: 2915840 | consumed tokens: 5971640320 | elapsed time per iteration (s): 0.30 | learning rate: 1.033E-04 | global batch size: 256 | lm loss: 3.158484E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.879 | TFLOPs: 30.31 | +7: iteration 11400/ 21553 | consumed samples: 2918400 | consumed tokens: 5976883200 | elapsed time per iteration (s): 0.30 | learning rate: 1.032E-04 | global batch size: 256 | lm loss: 3.155828E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.002 | TFLOPs: 30.32 | +7: iteration 11410/ 21553 | consumed samples: 2920960 | consumed tokens: 5982126080 | elapsed time per iteration (s): 0.30 | learning rate: 1.030E-04 | global batch size: 256 | lm loss: 3.152901E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.315 | TFLOPs: 30.33 | +7: iteration 11420/ 21553 | consumed samples: 2923520 | consumed tokens: 5987368960 | elapsed time per iteration (s): 0.30 | learning rate: 1.029E-04 | global batch size: 256 | lm loss: 3.166925E+00 | grad norm: 0.475 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.728 | TFLOPs: 30.31 | +7: iteration 11430/ 21553 | consumed samples: 2926080 | consumed tokens: 5992611840 | elapsed time per iteration (s): 0.30 | learning rate: 1.028E-04 | global batch size: 256 | lm loss: 3.174046E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.813 | TFLOPs: 30.31 | +7: iteration 11440/ 21553 | consumed samples: 2928640 | consumed tokens: 5997854720 | elapsed time per iteration (s): 0.30 | learning rate: 1.026E-04 | global batch size: 256 | lm loss: 3.157820E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.046 | TFLOPs: 30.32 | +7: iteration 11450/ 21553 | consumed samples: 2931200 | consumed tokens: 6003097600 | elapsed time per iteration (s): 0.30 | learning rate: 1.025E-04 | global batch size: 256 | lm loss: 3.160566E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.982 | TFLOPs: 30.32 | +7: iteration 11460/ 21553 | consumed samples: 2933760 | consumed tokens: 6008340480 | elapsed time per iteration (s): 0.30 | learning rate: 1.024E-04 | global batch size: 256 | lm loss: 3.167998E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.198 | TFLOPs: 30.32 | +7: iteration 11470/ 21553 | consumed samples: 2936320 | consumed tokens: 6013583360 | elapsed time per iteration (s): 0.30 | learning rate: 1.022E-04 | global batch size: 256 | lm loss: 3.154894E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.993 | TFLOPs: 30.32 | +7: iteration 11480/ 21553 | consumed samples: 2938880 | consumed tokens: 6018826240 | elapsed time per iteration (s): 0.30 | learning rate: 1.021E-04 | global batch size: 256 | lm loss: 3.160963E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.070 | TFLOPs: 30.32 | +7: iteration 11490/ 21553 | consumed samples: 2941440 | consumed tokens: 6024069120 | elapsed time per iteration (s): 0.30 | learning rate: 1.020E-04 | global batch size: 256 | lm loss: 3.166253E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.144 | TFLOPs: 30.32 | +7: iteration 11500/ 21553 | consumed samples: 2944000 | consumed tokens: 6029312000 | elapsed time per iteration (s): 0.30 | learning rate: 1.019E-04 | global batch size: 256 | lm loss: 3.154160E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.820 | TFLOPs: 30.31 | +7: iteration 11510/ 21553 | consumed samples: 2946560 | consumed tokens: 6034554880 | elapsed time per iteration (s): 0.30 | learning rate: 1.017E-04 | global batch size: 256 | lm loss: 3.156605E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.094 | TFLOPs: 30.32 | +7: iteration 11520/ 21553 | consumed samples: 2949120 | consumed tokens: 6039797760 | elapsed time per iteration (s): 0.30 | learning rate: 1.016E-04 | global batch size: 256 | lm loss: 3.152517E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.999 | TFLOPs: 30.32 | +7: iteration 11530/ 21553 | consumed samples: 2951680 | consumed tokens: 6045040640 | elapsed time per iteration (s): 0.30 | learning rate: 1.015E-04 | global batch size: 256 | lm loss: 3.162980E+00 | grad norm: 0.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.864 | TFLOPs: 30.31 | +7: iteration 11540/ 21553 | consumed samples: 2954240 | consumed tokens: 6050283520 | elapsed time per iteration (s): 0.30 | learning rate: 1.013E-04 | global batch size: 256 | lm loss: 3.170317E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.688 | TFLOPs: 30.34 | +7: iteration 11550/ 21553 | consumed samples: 2956800 | consumed tokens: 6055526400 | elapsed time per iteration (s): 0.30 | learning rate: 1.012E-04 | global batch size: 256 | lm loss: 3.170638E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.222 | TFLOPs: 30.32 | +7: iteration 11560/ 21553 | consumed samples: 2959360 | consumed tokens: 6060769280 | elapsed time per iteration (s): 0.30 | learning rate: 1.011E-04 | global batch size: 256 | lm loss: 3.149691E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.214 | TFLOPs: 30.32 | +7: iteration 11570/ 21553 | consumed samples: 2961920 | consumed tokens: 6066012160 | elapsed time per iteration (s): 0.30 | learning rate: 1.009E-04 | global batch size: 256 | lm loss: 3.159054E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.759 | TFLOPs: 30.31 | +7: iteration 11580/ 21553 | consumed samples: 2964480 | consumed tokens: 6071255040 | elapsed time per iteration (s): 0.30 | learning rate: 1.008E-04 | global batch size: 256 | lm loss: 3.163881E+00 | grad norm: 0.400 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.729 | TFLOPs: 30.31 | +7: iteration 11590/ 21553 | consumed samples: 2967040 | consumed tokens: 6076497920 | elapsed time per iteration (s): 0.30 | learning rate: 1.007E-04 | global batch size: 256 | lm loss: 3.159824E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.272 | TFLOPs: 30.29 | +7: iteration 11600/ 21553 | consumed samples: 2969600 | consumed tokens: 6081740800 | elapsed time per iteration (s): 0.30 | learning rate: 1.005E-04 | global batch size: 256 | lm loss: 3.151423E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.824 | TFLOPs: 30.31 | +7: iteration 11610/ 21553 | consumed samples: 2972160 | consumed tokens: 6086983680 | elapsed time per iteration (s): 0.30 | learning rate: 1.004E-04 | global batch size: 256 | lm loss: 3.162218E+00 | grad norm: 0.407 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.780 | TFLOPs: 30.31 | +7: iteration 11620/ 21553 | consumed samples: 2974720 | consumed tokens: 6092226560 | elapsed time per iteration (s): 0.30 | learning rate: 1.003E-04 | global batch size: 256 | lm loss: 3.161386E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.132 | TFLOPs: 30.32 | +7: iteration 11630/ 21553 | consumed samples: 2977280 | consumed tokens: 6097469440 | elapsed time per iteration (s): 0.30 | learning rate: 1.001E-04 | global batch size: 256 | lm loss: 3.166646E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.692 | TFLOPs: 30.31 | +7: iteration 11640/ 21553 | consumed samples: 2979840 | consumed tokens: 6102712320 | elapsed time per iteration (s): 0.30 | learning rate: 1.000E-04 | global batch size: 256 | lm loss: 3.146698E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.556 | TFLOPs: 30.34 | +7: iteration 11650/ 21553 | consumed samples: 2982400 | consumed tokens: 6107955200 | elapsed time per iteration (s): 0.30 | learning rate: 9.987E-05 | global batch size: 256 | lm loss: 3.152230E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.091 | TFLOPs: 30.32 | +7: iteration 11660/ 21553 | consumed samples: 2984960 | consumed tokens: 6113198080 | elapsed time per iteration (s): 0.30 | learning rate: 9.974E-05 | global batch size: 256 | lm loss: 3.147726E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.098 | TFLOPs: 30.35 | +7: iteration 11670/ 21553 | consumed samples: 2987520 | consumed tokens: 6118440960 | elapsed time per iteration (s): 0.30 | learning rate: 9.961E-05 | global batch size: 256 | lm loss: 3.160035E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.057 | TFLOPs: 30.35 | +7: iteration 11680/ 21553 | consumed samples: 2990080 | consumed tokens: 6123683840 | elapsed time per iteration (s): 0.30 | learning rate: 9.948E-05 | global batch size: 256 | lm loss: 3.152523E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.224 | TFLOPs: 30.36 | +7: iteration 11690/ 21553 | consumed samples: 2992640 | consumed tokens: 6128926720 | elapsed time per iteration (s): 0.30 | learning rate: 9.935E-05 | global batch size: 256 | lm loss: 3.148754E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.272 | TFLOPs: 30.36 | +7: iteration 11700/ 21553 | consumed samples: 2995200 | consumed tokens: 6134169600 | elapsed time per iteration (s): 0.30 | learning rate: 9.922E-05 | global batch size: 256 | lm loss: 3.156175E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.102 | TFLOPs: 30.35 | +7: iteration 11710/ 21553 | consumed samples: 2997760 | consumed tokens: 6139412480 | elapsed time per iteration (s): 0.30 | learning rate: 9.909E-05 | global batch size: 256 | lm loss: 3.170702E+00 | grad norm: 0.410 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.990 | TFLOPs: 30.35 | +7: iteration 11720/ 21553 | consumed samples: 3000320 | consumed tokens: 6144655360 | elapsed time per iteration (s): 0.30 | learning rate: 9.895E-05 | global batch size: 256 | lm loss: 3.161667E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.806 | TFLOPs: 30.34 | +7: iteration 11730/ 21553 | consumed samples: 3002880 | consumed tokens: 6149898240 | elapsed time per iteration (s): 0.30 | learning rate: 9.882E-05 | global batch size: 256 | lm loss: 3.150462E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.641 | TFLOPs: 29.53 | +7: iteration 11740/ 21553 | consumed samples: 3005440 | consumed tokens: 6155141120 | elapsed time per iteration (s): 0.30 | learning rate: 9.869E-05 | global batch size: 256 | lm loss: 3.146656E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.988 | TFLOPs: 30.35 | +7: iteration 11750/ 21553 | consumed samples: 3008000 | consumed tokens: 6160384000 | elapsed time per iteration (s): 0.30 | learning rate: 9.856E-05 | global batch size: 256 | lm loss: 3.168144E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.032 | TFLOPs: 30.35 | +7: iteration 11760/ 21553 | consumed samples: 3010560 | consumed tokens: 6165626880 | elapsed time per iteration (s): 0.30 | learning rate: 9.843E-05 | global batch size: 256 | lm loss: 3.156705E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.968 | TFLOPs: 30.35 | +7: iteration 11770/ 21553 | consumed samples: 3013120 | consumed tokens: 6170869760 | elapsed time per iteration (s): 0.30 | learning rate: 9.830E-05 | global batch size: 256 | lm loss: 3.159735E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.023 | TFLOPs: 30.35 | +7: iteration 11780/ 21553 | consumed samples: 3015680 | consumed tokens: 6176112640 | elapsed time per iteration (s): 0.30 | learning rate: 9.817E-05 | global batch size: 256 | lm loss: 3.145569E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.962 | TFLOPs: 30.35 | +7: iteration 11790/ 21553 | consumed samples: 3018240 | consumed tokens: 6181355520 | elapsed time per iteration (s): 0.30 | learning rate: 9.803E-05 | global batch size: 256 | lm loss: 3.151872E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.747 | TFLOPs: 30.34 | +7: iteration 11800/ 21553 | consumed samples: 3020800 | consumed tokens: 6186598400 | elapsed time per iteration (s): 0.30 | learning rate: 9.790E-05 | global batch size: 256 | lm loss: 3.141490E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.300 | TFLOPs: 30.36 | +7: iteration 11810/ 21553 | consumed samples: 3023360 | consumed tokens: 6191841280 | elapsed time per iteration (s): 0.30 | learning rate: 9.777E-05 | global batch size: 256 | lm loss: 3.142869E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.839 | TFLOPs: 30.35 | +7: iteration 11820/ 21553 | consumed samples: 3025920 | consumed tokens: 6197084160 | elapsed time per iteration (s): 0.30 | learning rate: 9.764E-05 | global batch size: 256 | lm loss: 3.150854E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.824 | TFLOPs: 30.35 | +7: iteration 11830/ 21553 | consumed samples: 3028480 | consumed tokens: 6202327040 | elapsed time per iteration (s): 0.30 | learning rate: 9.751E-05 | global batch size: 256 | lm loss: 3.158379E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.821 | TFLOPs: 30.34 | +7: iteration 11840/ 21553 | consumed samples: 3031040 | consumed tokens: 6207569920 | elapsed time per iteration (s): 0.30 | learning rate: 9.738E-05 | global batch size: 256 | lm loss: 3.157224E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.488 | TFLOPs: 30.37 | +7: iteration 11850/ 21553 | consumed samples: 3033600 | consumed tokens: 6212812800 | elapsed time per iteration (s): 0.30 | learning rate: 9.725E-05 | global batch size: 256 | lm loss: 3.151771E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.566 | TFLOPs: 30.37 | +7: iteration 11860/ 21553 | consumed samples: 3036160 | consumed tokens: 6218055680 | elapsed time per iteration (s): 0.30 | learning rate: 9.712E-05 | global batch size: 256 | lm loss: 3.157829E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.691 | TFLOPs: 30.34 | +7: iteration 11870/ 21553 | consumed samples: 3038720 | consumed tokens: 6223298560 | elapsed time per iteration (s): 0.30 | learning rate: 9.698E-05 | global batch size: 256 | lm loss: 3.148732E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.189 | TFLOPs: 30.36 | +7: iteration 11880/ 21553 | consumed samples: 3041280 | consumed tokens: 6228541440 | elapsed time per iteration (s): 0.30 | learning rate: 9.685E-05 | global batch size: 256 | lm loss: 3.156075E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.436 | TFLOPs: 30.33 | +7: iteration 11890/ 21553 | consumed samples: 3043840 | consumed tokens: 6233784320 | elapsed time per iteration (s): 0.30 | learning rate: 9.672E-05 | global batch size: 256 | lm loss: 3.154800E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.159 | TFLOPs: 30.32 | +7: iteration 11900/ 21553 | consumed samples: 3046400 | consumed tokens: 6239027200 | elapsed time per iteration (s): 0.30 | learning rate: 9.659E-05 | global batch size: 256 | lm loss: 3.153316E+00 | grad norm: 0.487 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.230 | TFLOPs: 30.22 | +7: iteration 11910/ 21553 | consumed samples: 3048960 | consumed tokens: 6244270080 | elapsed time per iteration (s): 0.30 | learning rate: 9.646E-05 | global batch size: 256 | lm loss: 3.145055E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.736 | TFLOPs: 30.20 | +7: iteration 11920/ 21553 | consumed samples: 3051520 | consumed tokens: 6249512960 | elapsed time per iteration (s): 0.30 | learning rate: 9.633E-05 | global batch size: 256 | lm loss: 3.153652E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.014 | TFLOPs: 30.35 | +7: iteration 11930/ 21553 | consumed samples: 3054080 | consumed tokens: 6254755840 | elapsed time per iteration (s): 0.30 | learning rate: 9.620E-05 | global batch size: 256 | lm loss: 3.151539E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.947 | TFLOPs: 30.31 | +7: iteration 11940/ 21553 | consumed samples: 3056640 | consumed tokens: 6259998720 | elapsed time per iteration (s): 0.30 | learning rate: 9.607E-05 | global batch size: 256 | lm loss: 3.162433E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.962 | TFLOPs: 29.79 | +7: iteration 11950/ 21553 | consumed samples: 3059200 | consumed tokens: 6265241600 | elapsed time per iteration (s): 0.31 | learning rate: 9.594E-05 | global batch size: 256 | lm loss: 3.140758E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.778 | TFLOPs: 29.19 | +7: iteration 11960/ 21553 | consumed samples: 3061760 | consumed tokens: 6270484480 | elapsed time per iteration (s): 0.30 | learning rate: 9.581E-05 | global batch size: 256 | lm loss: 3.142500E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.488 | TFLOPs: 30.33 | +7: iteration 11970/ 21553 | consumed samples: 3064320 | consumed tokens: 6275727360 | elapsed time per iteration (s): 0.30 | learning rate: 9.567E-05 | global batch size: 256 | lm loss: 3.149581E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.284 | TFLOPs: 30.36 | +7: iteration 11980/ 21553 | consumed samples: 3066880 | consumed tokens: 6280970240 | elapsed time per iteration (s): 0.30 | learning rate: 9.554E-05 | global batch size: 256 | lm loss: 3.144158E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.751 | TFLOPs: 30.38 | +7: iteration 11990/ 21553 | consumed samples: 3069440 | consumed tokens: 6286213120 | elapsed time per iteration (s): 0.30 | learning rate: 9.541E-05 | global batch size: 256 | lm loss: 3.134571E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.190 | TFLOPs: 30.29 | +0: [2023-03-14 00:17:58,404] [INFO] [logging.py:68:log_dist] [Rank 0] step=12000, skipped=0, lr=[9.528206376265585e-05, 9.528206376265585e-05, 9.528206376265585e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 12000/ 21553 | consumed samples: 3072000 | consumed tokens: 6291456000 | elapsed time per iteration (s): 0.30 | learning rate: 9.528E-05 | global batch size: 256 | lm loss: 3.148695E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.795 | TFLOPs: 30.27 | +0: steps: 12000 loss: 3.1477 iter time (s): 0.294 samples/sec: 870.444 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 12000 | lm loss value: 3.703534E+00 | lm loss PPL: 4.059050E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 12000 to checkpoints_146m14b100m +0: [2023-03-14 00:17:58,523] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step12000 is begin to save! +0: [2023-03-14 00:17:58,526] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:17:58,611] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:17:58,611] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:17:58,626] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:17:58,626] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:17:58,641] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:17:58,641] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:17:58,656] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:17:58,656] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:17:58,671] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:17:58,671] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:17:58,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:17:58,686] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:17:58,701] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:17:58,701] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:17:58,715] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:17:58,716] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:17:58,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:17:58,731] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:17:58,745] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:17:58,746] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:17:58,760] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:17:58,761] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:17:58,775] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:17:58,776] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:17:58,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:17:58,791] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:17:58,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:17:58,806] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:17:58,820] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:17:58,821] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:17:58,836] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:17:58,836] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:17:58,837] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:17:58,838] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step12000/mp_rank_00_model_states.pt +0: [2023-03-14 00:17:58,838] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:17:58,840] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:17:58,857] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,856] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:17:58,888] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,889] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,889] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,889] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,891] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,891] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,891] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,891] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,894] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,894] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,894] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,892] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,894] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,895] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,895] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,895] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,895] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,897] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,897] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,897] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,898] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,898] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,898] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,904] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,904] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:17:58,904] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,904] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +6: [2023-03-14 00:17:58,904] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,906] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:17:58,907] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,907] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,907] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +1: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +5: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:17:58,908] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:17:58,908] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,909] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,909] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,909] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,895] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,899] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,894] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,898] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,902] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,902] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,894] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,898] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,902] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,902] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,902] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,902] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,900] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,903] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,903] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,903] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,902] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +2: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:17:58,902] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +2: [2023-03-14 00:17:58,907] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,903] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:17:58,907] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +3: [2023-03-14 00:17:58,903] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:17:58,903] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,897] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,897] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,899] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,900] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,900] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:17:58,900] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,900] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +4: [2023-03-14 00:17:58,900] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,917] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,917] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,917] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,917] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,917] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,918] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,918] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,918] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,918] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,918] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:17:58,918] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,918] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,921] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,921] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,921] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,921] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,922] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,922] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: [2023-03-14 00:17:58,924] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:17:58,924] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +7: [2023-03-14 00:17:58,927] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:17:58,927] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step12000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:17:58,927] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step12000 is ready now! +0: successfully saved checkpoint at iteration 12000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 407.87 +7: iteration 12010/ 21553 | consumed samples: 3074560 | consumed tokens: 6296698880 | elapsed time per iteration (s): 0.35 | learning rate: 9.515E-05 | global batch size: 256 | lm loss: 3.129955E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 733.806 | TFLOPs: 25.69 | +7: iteration 12020/ 21553 | consumed samples: 3077120 | consumed tokens: 6301941760 | elapsed time per iteration (s): 0.30 | learning rate: 9.502E-05 | global batch size: 256 | lm loss: 3.144282E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.606 | TFLOPs: 29.99 | +7: iteration 12030/ 21553 | consumed samples: 3079680 | consumed tokens: 6307184640 | elapsed time per iteration (s): 0.30 | learning rate: 9.489E-05 | global batch size: 256 | lm loss: 3.128887E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.906 | TFLOPs: 30.35 | +7: iteration 12040/ 21553 | consumed samples: 3082240 | consumed tokens: 6312427520 | elapsed time per iteration (s): 0.30 | learning rate: 9.476E-05 | global batch size: 256 | lm loss: 3.147118E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.659 | TFLOPs: 30.27 | +7: iteration 12050/ 21553 | consumed samples: 3084800 | consumed tokens: 6317670400 | elapsed time per iteration (s): 0.30 | learning rate: 9.463E-05 | global batch size: 256 | lm loss: 3.148638E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.825 | TFLOPs: 30.28 | +7: iteration 12060/ 21553 | consumed samples: 3087360 | consumed tokens: 6322913280 | elapsed time per iteration (s): 0.30 | learning rate: 9.450E-05 | global batch size: 256 | lm loss: 3.154290E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.009 | TFLOPs: 30.32 | +7: iteration 12070/ 21553 | consumed samples: 3089920 | consumed tokens: 6328156160 | elapsed time per iteration (s): 0.30 | learning rate: 9.437E-05 | global batch size: 256 | lm loss: 3.142284E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.934 | TFLOPs: 30.31 | +7: iteration 12080/ 21553 | consumed samples: 3092480 | consumed tokens: 6333399040 | elapsed time per iteration (s): 0.30 | learning rate: 9.424E-05 | global batch size: 256 | lm loss: 3.150952E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.675 | TFLOPs: 29.85 | +7: iteration 12090/ 21553 | consumed samples: 3095040 | consumed tokens: 6338641920 | elapsed time per iteration (s): 0.30 | learning rate: 9.411E-05 | global batch size: 256 | lm loss: 3.145870E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.524 | TFLOPs: 30.37 | +7: iteration 12100/ 21553 | consumed samples: 3097600 | consumed tokens: 6343884800 | elapsed time per iteration (s): 0.30 | learning rate: 9.398E-05 | global batch size: 256 | lm loss: 3.160147E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.590 | TFLOPs: 30.37 | +7: iteration 12110/ 21553 | consumed samples: 3100160 | consumed tokens: 6349127680 | elapsed time per iteration (s): 0.30 | learning rate: 9.385E-05 | global batch size: 256 | lm loss: 3.152781E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.323 | TFLOPs: 30.36 | +7: iteration 12120/ 21553 | consumed samples: 3102720 | consumed tokens: 6354370560 | elapsed time per iteration (s): 0.30 | learning rate: 9.372E-05 | global batch size: 256 | lm loss: 3.150690E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.411 | TFLOPs: 30.37 | +7: iteration 12130/ 21553 | consumed samples: 3105280 | consumed tokens: 6359613440 | elapsed time per iteration (s): 0.29 | learning rate: 9.359E-05 | global batch size: 256 | lm loss: 3.161104E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.877 | TFLOPs: 30.38 | +7: iteration 12140/ 21553 | consumed samples: 3107840 | consumed tokens: 6364856320 | elapsed time per iteration (s): 0.30 | learning rate: 9.346E-05 | global batch size: 256 | lm loss: 3.156386E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.224 | TFLOPs: 30.36 | +7: iteration 12150/ 21553 | consumed samples: 3110400 | consumed tokens: 6370099200 | elapsed time per iteration (s): 0.30 | learning rate: 9.332E-05 | global batch size: 256 | lm loss: 3.141418E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.345 | TFLOPs: 30.36 | +7: iteration 12160/ 21553 | consumed samples: 3112960 | consumed tokens: 6375342080 | elapsed time per iteration (s): 0.29 | learning rate: 9.319E-05 | global batch size: 256 | lm loss: 3.135134E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.901 | TFLOPs: 30.38 | +7: iteration 12170/ 21553 | consumed samples: 3115520 | consumed tokens: 6380584960 | elapsed time per iteration (s): 0.29 | learning rate: 9.306E-05 | global batch size: 256 | lm loss: 3.147464E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.893 | TFLOPs: 30.38 | +7: iteration 12180/ 21553 | consumed samples: 3118080 | consumed tokens: 6385827840 | elapsed time per iteration (s): 0.30 | learning rate: 9.293E-05 | global batch size: 256 | lm loss: 3.147990E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.763 | TFLOPs: 30.38 | +7: iteration 12190/ 21553 | consumed samples: 3120640 | consumed tokens: 6391070720 | elapsed time per iteration (s): 0.29 | learning rate: 9.280E-05 | global batch size: 256 | lm loss: 3.140443E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.801 | TFLOPs: 30.38 | +7: iteration 12200/ 21553 | consumed samples: 3123200 | consumed tokens: 6396313600 | elapsed time per iteration (s): 0.30 | learning rate: 9.267E-05 | global batch size: 256 | lm loss: 3.138022E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.710 | TFLOPs: 30.38 | +7: iteration 12210/ 21553 | consumed samples: 3125760 | consumed tokens: 6401556480 | elapsed time per iteration (s): 0.29 | learning rate: 9.254E-05 | global batch size: 256 | lm loss: 3.151898E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.903 | TFLOPs: 30.38 | +7: iteration 12220/ 21553 | consumed samples: 3128320 | consumed tokens: 6406799360 | elapsed time per iteration (s): 0.30 | learning rate: 9.241E-05 | global batch size: 256 | lm loss: 3.158717E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.768 | TFLOPs: 30.38 | +7: iteration 12230/ 21553 | consumed samples: 3130880 | consumed tokens: 6412042240 | elapsed time per iteration (s): 0.29 | learning rate: 9.228E-05 | global batch size: 256 | lm loss: 3.138061E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.908 | TFLOPs: 30.38 | +7: iteration 12240/ 21553 | consumed samples: 3133440 | consumed tokens: 6417285120 | elapsed time per iteration (s): 0.30 | learning rate: 9.215E-05 | global batch size: 256 | lm loss: 3.147971E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.712 | TFLOPs: 30.38 | +7: iteration 12250/ 21553 | consumed samples: 3136000 | consumed tokens: 6422528000 | elapsed time per iteration (s): 0.29 | learning rate: 9.202E-05 | global batch size: 256 | lm loss: 3.139772E+00 | grad norm: 0.480 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.810 | TFLOPs: 30.38 | +7: iteration 12260/ 21553 | consumed samples: 3138560 | consumed tokens: 6427770880 | elapsed time per iteration (s): 0.30 | learning rate: 9.189E-05 | global batch size: 256 | lm loss: 3.138602E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.638 | TFLOPs: 30.37 | +7: iteration 12270/ 21553 | consumed samples: 3141120 | consumed tokens: 6433013760 | elapsed time per iteration (s): 0.30 | learning rate: 9.177E-05 | global batch size: 256 | lm loss: 3.134571E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.023 | TFLOPs: 29.72 | +7: iteration 12280/ 21553 | consumed samples: 3143680 | consumed tokens: 6438256640 | elapsed time per iteration (s): 0.30 | learning rate: 9.164E-05 | global batch size: 256 | lm loss: 3.127726E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.564 | TFLOPs: 30.37 | +7: iteration 12290/ 21553 | consumed samples: 3146240 | consumed tokens: 6443499520 | elapsed time per iteration (s): 0.29 | learning rate: 9.151E-05 | global batch size: 256 | lm loss: 3.136577E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.151 | TFLOPs: 30.39 | +7: iteration 12300/ 21553 | consumed samples: 3148800 | consumed tokens: 6448742400 | elapsed time per iteration (s): 0.30 | learning rate: 9.138E-05 | global batch size: 256 | lm loss: 3.130323E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.674 | TFLOPs: 30.37 | +7: iteration 12310/ 21553 | consumed samples: 3151360 | consumed tokens: 6453985280 | elapsed time per iteration (s): 0.30 | learning rate: 9.125E-05 | global batch size: 256 | lm loss: 3.136502E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.567 | TFLOPs: 30.37 | +7: iteration 12320/ 21553 | consumed samples: 3153920 | consumed tokens: 6459228160 | elapsed time per iteration (s): 0.29 | learning rate: 9.112E-05 | global batch size: 256 | lm loss: 3.136997E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.878 | TFLOPs: 30.38 | +7: iteration 12330/ 21553 | consumed samples: 3156480 | consumed tokens: 6464471040 | elapsed time per iteration (s): 0.30 | learning rate: 9.099E-05 | global batch size: 256 | lm loss: 3.133354E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.696 | TFLOPs: 30.38 | +7: iteration 12340/ 21553 | consumed samples: 3159040 | consumed tokens: 6469713920 | elapsed time per iteration (s): 0.29 | learning rate: 9.086E-05 | global batch size: 256 | lm loss: 3.137918E+00 | grad norm: 0.403 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.031 | TFLOPs: 30.39 | +7: iteration 12350/ 21553 | consumed samples: 3161600 | consumed tokens: 6474956800 | elapsed time per iteration (s): 0.30 | learning rate: 9.073E-05 | global batch size: 256 | lm loss: 3.131362E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.946 | TFLOPs: 30.35 | +7: iteration 12360/ 21553 | consumed samples: 3164160 | consumed tokens: 6480199680 | elapsed time per iteration (s): 0.30 | learning rate: 9.060E-05 | global batch size: 256 | lm loss: 3.135370E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.587 | TFLOPs: 30.37 | +7: iteration 12370/ 21553 | consumed samples: 3166720 | consumed tokens: 6485442560 | elapsed time per iteration (s): 0.30 | learning rate: 9.047E-05 | global batch size: 256 | lm loss: 3.137044E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.495 | TFLOPs: 30.37 | +7: iteration 12380/ 21553 | consumed samples: 3169280 | consumed tokens: 6490685440 | elapsed time per iteration (s): 0.30 | learning rate: 9.034E-05 | global batch size: 256 | lm loss: 3.128093E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.666 | TFLOPs: 30.37 | +7: iteration 12390/ 21553 | consumed samples: 3171840 | consumed tokens: 6495928320 | elapsed time per iteration (s): 0.30 | learning rate: 9.021E-05 | global batch size: 256 | lm loss: 3.147587E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.796 | TFLOPs: 30.38 | +7: iteration 12400/ 21553 | consumed samples: 3174400 | consumed tokens: 6501171200 | elapsed time per iteration (s): 0.30 | learning rate: 9.008E-05 | global batch size: 256 | lm loss: 3.136713E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.631 | TFLOPs: 30.37 | +7: iteration 12410/ 21553 | consumed samples: 3176960 | consumed tokens: 6506414080 | elapsed time per iteration (s): 0.30 | learning rate: 8.995E-05 | global batch size: 256 | lm loss: 3.130922E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.316 | TFLOPs: 30.36 | +7: iteration 12420/ 21553 | consumed samples: 3179520 | consumed tokens: 6511656960 | elapsed time per iteration (s): 0.30 | learning rate: 8.982E-05 | global batch size: 256 | lm loss: 3.138860E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.542 | TFLOPs: 30.30 | +7: iteration 12430/ 21553 | consumed samples: 3182080 | consumed tokens: 6516899840 | elapsed time per iteration (s): 0.30 | learning rate: 8.969E-05 | global batch size: 256 | lm loss: 3.135616E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.790 | TFLOPs: 30.06 | +7: iteration 12440/ 21553 | consumed samples: 3184640 | consumed tokens: 6522142720 | elapsed time per iteration (s): 0.29 | learning rate: 8.957E-05 | global batch size: 256 | lm loss: 3.144833E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.099 | TFLOPs: 30.39 | +7: iteration 12450/ 21553 | consumed samples: 3187200 | consumed tokens: 6527385600 | elapsed time per iteration (s): 0.30 | learning rate: 8.944E-05 | global batch size: 256 | lm loss: 3.128932E+00 | grad norm: 0.398 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.558 | TFLOPs: 30.37 | +7: iteration 12460/ 21553 | consumed samples: 3189760 | consumed tokens: 6532628480 | elapsed time per iteration (s): 0.30 | learning rate: 8.931E-05 | global batch size: 256 | lm loss: 3.127082E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.764 | TFLOPs: 30.38 | +7: iteration 12470/ 21553 | consumed samples: 3192320 | consumed tokens: 6537871360 | elapsed time per iteration (s): 0.29 | learning rate: 8.918E-05 | global batch size: 256 | lm loss: 3.136837E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.798 | TFLOPs: 30.38 | +7: iteration 12480/ 21553 | consumed samples: 3194880 | consumed tokens: 6543114240 | elapsed time per iteration (s): 0.29 | learning rate: 8.905E-05 | global batch size: 256 | lm loss: 3.128073E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.943 | TFLOPs: 30.38 | +7: iteration 12490/ 21553 | consumed samples: 3197440 | consumed tokens: 6548357120 | elapsed time per iteration (s): 0.29 | learning rate: 8.892E-05 | global batch size: 256 | lm loss: 3.136391E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.022 | TFLOPs: 30.39 | +7: iteration 12500/ 21553 | consumed samples: 3200000 | consumed tokens: 6553600000 | elapsed time per iteration (s): 0.30 | learning rate: 8.879E-05 | global batch size: 256 | lm loss: 3.143279E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.775 | TFLOPs: 30.38 | +7: iteration 12510/ 21553 | consumed samples: 3202560 | consumed tokens: 6558842880 | elapsed time per iteration (s): 0.29 | learning rate: 8.866E-05 | global batch size: 256 | lm loss: 3.131344E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.425 | TFLOPs: 30.40 | +7: iteration 12520/ 21553 | consumed samples: 3205120 | consumed tokens: 6564085760 | elapsed time per iteration (s): 0.29 | learning rate: 8.853E-05 | global batch size: 256 | lm loss: 3.129259E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.214 | TFLOPs: 30.39 | +7: iteration 12530/ 21553 | consumed samples: 3207680 | consumed tokens: 6569328640 | elapsed time per iteration (s): 0.29 | learning rate: 8.841E-05 | global batch size: 256 | lm loss: 3.143793E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.076 | TFLOPs: 30.39 | +7: iteration 12540/ 21553 | consumed samples: 3210240 | consumed tokens: 6574571520 | elapsed time per iteration (s): 0.29 | learning rate: 8.828E-05 | global batch size: 256 | lm loss: 3.141154E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.070 | TFLOPs: 30.39 | +7: iteration 12550/ 21553 | consumed samples: 3212800 | consumed tokens: 6579814400 | elapsed time per iteration (s): 0.29 | learning rate: 8.815E-05 | global batch size: 256 | lm loss: 3.132311E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.983 | TFLOPs: 30.39 | +7: iteration 12560/ 21553 | consumed samples: 3215360 | consumed tokens: 6585057280 | elapsed time per iteration (s): 0.30 | learning rate: 8.802E-05 | global batch size: 256 | lm loss: 3.126809E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.456 | TFLOPs: 30.37 | +7: iteration 12570/ 21553 | consumed samples: 3217920 | consumed tokens: 6590300160 | elapsed time per iteration (s): 0.30 | learning rate: 8.789E-05 | global batch size: 256 | lm loss: 3.132348E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.613 | TFLOPs: 30.37 | +7: iteration 12580/ 21553 | consumed samples: 3220480 | consumed tokens: 6595543040 | elapsed time per iteration (s): 0.30 | learning rate: 8.776E-05 | global batch size: 256 | lm loss: 3.127788E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.633 | TFLOPs: 30.37 | +7: iteration 12590/ 21553 | consumed samples: 3223040 | consumed tokens: 6600785920 | elapsed time per iteration (s): 0.30 | learning rate: 8.763E-05 | global batch size: 256 | lm loss: 3.119287E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.540 | TFLOPs: 30.37 | +7: iteration 12600/ 21553 | consumed samples: 3225600 | consumed tokens: 6606028800 | elapsed time per iteration (s): 0.30 | learning rate: 8.751E-05 | global batch size: 256 | lm loss: 3.142310E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.632 | TFLOPs: 30.37 | +7: iteration 12610/ 21553 | consumed samples: 3228160 | consumed tokens: 6611271680 | elapsed time per iteration (s): 0.30 | learning rate: 8.738E-05 | global batch size: 256 | lm loss: 3.122646E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.364 | TFLOPs: 30.36 | +7: iteration 12620/ 21553 | consumed samples: 3230720 | consumed tokens: 6616514560 | elapsed time per iteration (s): 0.30 | learning rate: 8.725E-05 | global batch size: 256 | lm loss: 3.128092E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.566 | TFLOPs: 30.37 | +7: iteration 12630/ 21553 | consumed samples: 3233280 | consumed tokens: 6621757440 | elapsed time per iteration (s): 0.30 | learning rate: 8.712E-05 | global batch size: 256 | lm loss: 3.142537E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.019 | TFLOPs: 30.35 | +7: iteration 12640/ 21553 | consumed samples: 3235840 | consumed tokens: 6627000320 | elapsed time per iteration (s): 0.30 | learning rate: 8.699E-05 | global batch size: 256 | lm loss: 3.136421E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.976 | TFLOPs: 30.35 | +7: iteration 12650/ 21553 | consumed samples: 3238400 | consumed tokens: 6632243200 | elapsed time per iteration (s): 0.30 | learning rate: 8.687E-05 | global batch size: 256 | lm loss: 3.120798E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.062 | TFLOPs: 30.35 | +7: iteration 12660/ 21553 | consumed samples: 3240960 | consumed tokens: 6637486080 | elapsed time per iteration (s): 0.30 | learning rate: 8.674E-05 | global batch size: 256 | lm loss: 3.135810E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.956 | TFLOPs: 30.35 | +7: iteration 12670/ 21553 | consumed samples: 3243520 | consumed tokens: 6642728960 | elapsed time per iteration (s): 0.30 | learning rate: 8.661E-05 | global batch size: 256 | lm loss: 3.121835E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.613 | TFLOPs: 30.34 | +7: iteration 12680/ 21553 | consumed samples: 3246080 | consumed tokens: 6647971840 | elapsed time per iteration (s): 0.30 | learning rate: 8.648E-05 | global batch size: 256 | lm loss: 3.138918E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.038 | TFLOPs: 30.35 | +7: iteration 12690/ 21553 | consumed samples: 3248640 | consumed tokens: 6653214720 | elapsed time per iteration (s): 0.30 | learning rate: 8.635E-05 | global batch size: 256 | lm loss: 3.134372E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.823 | TFLOPs: 30.35 | +7: iteration 12700/ 21553 | consumed samples: 3251200 | consumed tokens: 6658457600 | elapsed time per iteration (s): 0.30 | learning rate: 8.623E-05 | global batch size: 256 | lm loss: 3.125204E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.225 | TFLOPs: 30.32 | +7: iteration 12710/ 21553 | consumed samples: 3253760 | consumed tokens: 6663700480 | elapsed time per iteration (s): 0.30 | learning rate: 8.610E-05 | global batch size: 256 | lm loss: 3.111779E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.720 | TFLOPs: 30.34 | +7: iteration 12720/ 21553 | consumed samples: 3256320 | consumed tokens: 6668943360 | elapsed time per iteration (s): 0.30 | learning rate: 8.597E-05 | global batch size: 256 | lm loss: 3.123969E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.131 | TFLOPs: 30.36 | +7: iteration 12730/ 21553 | consumed samples: 3258880 | consumed tokens: 6674186240 | elapsed time per iteration (s): 0.30 | learning rate: 8.584E-05 | global batch size: 256 | lm loss: 3.129477E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.823 | TFLOPs: 30.35 | +7: iteration 12740/ 21553 | consumed samples: 3261440 | consumed tokens: 6679429120 | elapsed time per iteration (s): 0.30 | learning rate: 8.571E-05 | global batch size: 256 | lm loss: 3.125489E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.983 | TFLOPs: 30.35 | +7: iteration 12750/ 21553 | consumed samples: 3264000 | consumed tokens: 6684672000 | elapsed time per iteration (s): 0.30 | learning rate: 8.559E-05 | global batch size: 256 | lm loss: 3.130894E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.727 | TFLOPs: 30.34 | +7: iteration 12760/ 21553 | consumed samples: 3266560 | consumed tokens: 6689914880 | elapsed time per iteration (s): 0.30 | learning rate: 8.546E-05 | global batch size: 256 | lm loss: 3.122475E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.245 | TFLOPs: 30.15 | +7: iteration 12770/ 21553 | consumed samples: 3269120 | consumed tokens: 6695157760 | elapsed time per iteration (s): 0.33 | learning rate: 8.533E-05 | global batch size: 256 | lm loss: 3.127545E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 783.034 | TFLOPs: 27.41 | +7: iteration 12780/ 21553 | consumed samples: 3271680 | consumed tokens: 6700400640 | elapsed time per iteration (s): 0.30 | learning rate: 8.520E-05 | global batch size: 256 | lm loss: 3.125103E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.483 | TFLOPs: 29.84 | +7: iteration 12790/ 21553 | consumed samples: 3274240 | consumed tokens: 6705643520 | elapsed time per iteration (s): 0.30 | learning rate: 8.508E-05 | global batch size: 256 | lm loss: 3.124576E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 839.532 | TFLOPs: 29.39 | +7: iteration 12800/ 21553 | consumed samples: 3276800 | consumed tokens: 6710886400 | elapsed time per iteration (s): 0.30 | learning rate: 8.495E-05 | global batch size: 256 | lm loss: 3.142001E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.124 | TFLOPs: 30.11 | +7: iteration 12810/ 21553 | consumed samples: 3279360 | consumed tokens: 6716129280 | elapsed time per iteration (s): 0.30 | learning rate: 8.482E-05 | global batch size: 256 | lm loss: 3.126734E+00 | grad norm: 0.406 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.740 | TFLOPs: 30.03 | +7: iteration 12820/ 21553 | consumed samples: 3281920 | consumed tokens: 6721372160 | elapsed time per iteration (s): 0.30 | learning rate: 8.470E-05 | global batch size: 256 | lm loss: 3.126407E+00 | grad norm: 0.409 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.306 | TFLOPs: 30.29 | +7: iteration 12830/ 21553 | consumed samples: 3284480 | consumed tokens: 6726615040 | elapsed time per iteration (s): 0.30 | learning rate: 8.457E-05 | global batch size: 256 | lm loss: 3.105674E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.265 | TFLOPs: 30.12 | +7: iteration 12840/ 21553 | consumed samples: 3287040 | consumed tokens: 6731857920 | elapsed time per iteration (s): 0.30 | learning rate: 8.444E-05 | global batch size: 256 | lm loss: 3.130755E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.478 | TFLOPs: 30.05 | +7: iteration 12850/ 21553 | consumed samples: 3289600 | consumed tokens: 6737100800 | elapsed time per iteration (s): 0.30 | learning rate: 8.431E-05 | global batch size: 256 | lm loss: 3.125647E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.540 | TFLOPs: 30.30 | +7: iteration 12860/ 21553 | consumed samples: 3292160 | consumed tokens: 6742343680 | elapsed time per iteration (s): 0.30 | learning rate: 8.419E-05 | global batch size: 256 | lm loss: 3.129032E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.332 | TFLOPs: 30.33 | +7: iteration 12870/ 21553 | consumed samples: 3294720 | consumed tokens: 6747586560 | elapsed time per iteration (s): 0.30 | learning rate: 8.406E-05 | global batch size: 256 | lm loss: 3.139147E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.914 | TFLOPs: 30.31 | +7: iteration 12880/ 21553 | consumed samples: 3297280 | consumed tokens: 6752829440 | elapsed time per iteration (s): 0.30 | learning rate: 8.393E-05 | global batch size: 256 | lm loss: 3.136837E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.859 | TFLOPs: 30.21 | +7: iteration 12890/ 21553 | consumed samples: 3299840 | consumed tokens: 6758072320 | elapsed time per iteration (s): 0.30 | learning rate: 8.381E-05 | global batch size: 256 | lm loss: 3.126401E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.276 | TFLOPs: 30.29 | +7: iteration 12900/ 21553 | consumed samples: 3302400 | consumed tokens: 6763315200 | elapsed time per iteration (s): 0.30 | learning rate: 8.368E-05 | global batch size: 256 | lm loss: 3.113042E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.426 | TFLOPs: 30.02 | +7: iteration 12910/ 21553 | consumed samples: 3304960 | consumed tokens: 6768558080 | elapsed time per iteration (s): 0.30 | learning rate: 8.355E-05 | global batch size: 256 | lm loss: 3.117208E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.025 | TFLOPs: 29.69 | +7: iteration 12920/ 21553 | consumed samples: 3307520 | consumed tokens: 6773800960 | elapsed time per iteration (s): 0.30 | learning rate: 8.343E-05 | global batch size: 256 | lm loss: 3.128596E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.274 | TFLOPs: 30.33 | +7: iteration 12930/ 21553 | consumed samples: 3310080 | consumed tokens: 6779043840 | elapsed time per iteration (s): 0.30 | learning rate: 8.330E-05 | global batch size: 256 | lm loss: 3.120181E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.430 | TFLOPs: 29.95 | +7: iteration 12940/ 21553 | consumed samples: 3312640 | consumed tokens: 6784286720 | elapsed time per iteration (s): 0.30 | learning rate: 8.317E-05 | global batch size: 256 | lm loss: 3.110139E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.164 | TFLOPs: 30.29 | +7: iteration 12950/ 21553 | consumed samples: 3315200 | consumed tokens: 6789529600 | elapsed time per iteration (s): 0.30 | learning rate: 8.305E-05 | global batch size: 256 | lm loss: 3.127354E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.766 | TFLOPs: 30.38 | +7: iteration 12960/ 21553 | consumed samples: 3317760 | consumed tokens: 6794772480 | elapsed time per iteration (s): 0.31 | learning rate: 8.292E-05 | global batch size: 256 | lm loss: 3.110998E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 814.976 | TFLOPs: 28.53 | +7: iteration 12970/ 21553 | consumed samples: 3320320 | consumed tokens: 6800015360 | elapsed time per iteration (s): 0.30 | learning rate: 8.279E-05 | global batch size: 256 | lm loss: 3.138717E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.097 | TFLOPs: 29.76 | +7: iteration 12980/ 21553 | consumed samples: 3322880 | consumed tokens: 6805258240 | elapsed time per iteration (s): 0.30 | learning rate: 8.267E-05 | global batch size: 256 | lm loss: 3.123336E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.270 | TFLOPs: 30.36 | +7: iteration 12990/ 21553 | consumed samples: 3325440 | consumed tokens: 6810501120 | elapsed time per iteration (s): 0.30 | learning rate: 8.254E-05 | global batch size: 256 | lm loss: 3.114506E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.595 | TFLOPs: 30.34 | +7: iteration 13000/ 21553 | consumed samples: 3328000 | consumed tokens: 6815744000 | elapsed time per iteration (s): 0.33 | learning rate: 8.242E-05 | global batch size: 256 | lm loss: 3.109088E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 783.541 | TFLOPs: 27.43 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 13000 | lm loss value: 3.823826E+00 | lm loss PPL: 4.577901E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 13000 to checkpoints_146m14b100m +0: [2023-03-14 00:22:55,738] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step13000 is begin to save! +0: [2023-03-14 00:22:55,755] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:22:55,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:22:55,859] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:22:55,875] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:22:55,875] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:22:55,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:22:55,891] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:22:55,906] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:22:55,906] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:22:55,922] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:22:55,922] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:22:55,937] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:22:55,937] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:22:55,953] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:22:55,953] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:22:55,968] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:22:55,969] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:22:55,984] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:22:55,984] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:22:55,999] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:22:56,000] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:22:56,015] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:22:56,015] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:22:56,030] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:22:56,031] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:22:56,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:22:56,046] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:22:56,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:22:56,062] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:22:56,077] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:22:56,077] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:22:56,092] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:22:56,092] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:22:56,093] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:22:56,094] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step13000/mp_rank_00_model_states.pt +0: [2023-03-14 00:22:56,094] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:22:56,096] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:22:56,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:22:56,149] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,149] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,149] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,151] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,151] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,151] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,152] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,152] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,152] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2023-03-14 00:22:56,152] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,153] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,152] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,153] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,153] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,153] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,153] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,154] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,154] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,155] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,155] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,155] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,155] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,156] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,156] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,157] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,157] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:22:56,157] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,157] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 00:22:56,157] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +2: [2023-03-14 00:22:56,157] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +3: [2023-03-14 00:22:56,159] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:22:56,159] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 00:22:56,159] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,159] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:22:56,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +1: [2023-03-14 00:22:56,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,164] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,164] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,164] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +5: [2023-03-14 00:22:56,164] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:22:56,165] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:22:56,165] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:22:56,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +4: [2023-03-14 00:22:56,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +6: [2023-03-14 00:22:56,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:22:56,169] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:22:56,169] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,178] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,178] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,178] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,179] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,179] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:22:56,179] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,179] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: [2023-03-14 00:22:56,185] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:22:56,185] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,210] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step13000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:22:56,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +7: [2023-03-14 00:22:56,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step13000 is ready now! +0: successfully saved checkpoint at iteration 13000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 484.89 +7: iteration 13010/ 21553 | consumed samples: 3330560 | consumed tokens: 6820986880 | elapsed time per iteration (s): 0.38 | learning rate: 8.229E-05 | global batch size: 256 | lm loss: 3.118472E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 677.161 | TFLOPs: 23.71 | +7: iteration 13020/ 21553 | consumed samples: 3333120 | consumed tokens: 6826229760 | elapsed time per iteration (s): 0.33 | learning rate: 8.216E-05 | global batch size: 256 | lm loss: 3.121945E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 780.768 | TFLOPs: 27.33 | +7: iteration 13030/ 21553 | consumed samples: 3335680 | consumed tokens: 6831472640 | elapsed time per iteration (s): 0.30 | learning rate: 8.204E-05 | global batch size: 256 | lm loss: 3.104316E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.705 | TFLOPs: 29.78 | +7: iteration 13040/ 21553 | consumed samples: 3338240 | consumed tokens: 6836715520 | elapsed time per iteration (s): 0.30 | learning rate: 8.191E-05 | global batch size: 256 | lm loss: 3.122021E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.769 | TFLOPs: 30.34 | +7: iteration 13050/ 21553 | consumed samples: 3340800 | consumed tokens: 6841958400 | elapsed time per iteration (s): 0.30 | learning rate: 8.179E-05 | global batch size: 256 | lm loss: 3.113222E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.635 | TFLOPs: 30.34 | +7: iteration 13060/ 21553 | consumed samples: 3343360 | consumed tokens: 6847201280 | elapsed time per iteration (s): 0.30 | learning rate: 8.166E-05 | global batch size: 256 | lm loss: 3.116309E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.825 | TFLOPs: 29.65 | +7: iteration 13070/ 21553 | consumed samples: 3345920 | consumed tokens: 6852444160 | elapsed time per iteration (s): 0.32 | learning rate: 8.153E-05 | global batch size: 256 | lm loss: 3.126917E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 806.775 | TFLOPs: 28.24 | +7: iteration 13080/ 21553 | consumed samples: 3348480 | consumed tokens: 6857687040 | elapsed time per iteration (s): 0.31 | learning rate: 8.141E-05 | global batch size: 256 | lm loss: 3.120174E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.851 | TFLOPs: 29.26 | +7: iteration 13090/ 21553 | consumed samples: 3351040 | consumed tokens: 6862929920 | elapsed time per iteration (s): 0.30 | learning rate: 8.128E-05 | global batch size: 256 | lm loss: 3.128074E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.471 | TFLOPs: 30.09 | +7: iteration 13100/ 21553 | consumed samples: 3353600 | consumed tokens: 6868172800 | elapsed time per iteration (s): 0.35 | learning rate: 8.116E-05 | global batch size: 256 | lm loss: 3.117290E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 722.353 | TFLOPs: 25.29 | +7: iteration 13110/ 21553 | consumed samples: 3356160 | consumed tokens: 6873415680 | elapsed time per iteration (s): 0.31 | learning rate: 8.103E-05 | global batch size: 256 | lm loss: 3.116457E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 834.415 | TFLOPs: 29.21 | +7: iteration 13120/ 21553 | consumed samples: 3358720 | consumed tokens: 6878658560 | elapsed time per iteration (s): 0.31 | learning rate: 8.091E-05 | global batch size: 256 | lm loss: 3.118108E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.594 | TFLOPs: 29.25 | +7: iteration 13130/ 21553 | consumed samples: 3361280 | consumed tokens: 6883901440 | elapsed time per iteration (s): 0.30 | learning rate: 8.078E-05 | global batch size: 256 | lm loss: 3.122098E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.071 | TFLOPs: 29.93 | +7: iteration 13140/ 21553 | consumed samples: 3363840 | consumed tokens: 6889144320 | elapsed time per iteration (s): 0.31 | learning rate: 8.066E-05 | global batch size: 256 | lm loss: 3.115784E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 838.887 | TFLOPs: 29.37 | +7: iteration 13150/ 21553 | consumed samples: 3366400 | consumed tokens: 6894387200 | elapsed time per iteration (s): 0.30 | learning rate: 8.053E-05 | global batch size: 256 | lm loss: 3.108983E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.152 | TFLOPs: 29.62 | +7: iteration 13160/ 21553 | consumed samples: 3368960 | consumed tokens: 6899630080 | elapsed time per iteration (s): 0.31 | learning rate: 8.041E-05 | global batch size: 256 | lm loss: 3.113407E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 827.369 | TFLOPs: 28.96 | +7: iteration 13170/ 21553 | consumed samples: 3371520 | consumed tokens: 6904872960 | elapsed time per iteration (s): 0.31 | learning rate: 8.028E-05 | global batch size: 256 | lm loss: 3.121722E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.831 | TFLOPs: 29.09 | +7: iteration 13180/ 21553 | consumed samples: 3374080 | consumed tokens: 6910115840 | elapsed time per iteration (s): 0.31 | learning rate: 8.016E-05 | global batch size: 256 | lm loss: 3.128020E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.041 | TFLOPs: 29.27 | +7: iteration 13190/ 21553 | consumed samples: 3376640 | consumed tokens: 6915358720 | elapsed time per iteration (s): 0.30 | learning rate: 8.003E-05 | global batch size: 256 | lm loss: 3.115975E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.385 | TFLOPs: 29.73 | +7: iteration 13200/ 21553 | consumed samples: 3379200 | consumed tokens: 6920601600 | elapsed time per iteration (s): 0.30 | learning rate: 7.991E-05 | global batch size: 256 | lm loss: 3.121934E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.965 | TFLOPs: 29.79 | +7: iteration 13210/ 21553 | consumed samples: 3381760 | consumed tokens: 6925844480 | elapsed time per iteration (s): 0.30 | learning rate: 7.978E-05 | global batch size: 256 | lm loss: 3.119106E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.391 | TFLOPs: 29.59 | +7: iteration 13220/ 21553 | consumed samples: 3384320 | consumed tokens: 6931087360 | elapsed time per iteration (s): 0.31 | learning rate: 7.966E-05 | global batch size: 256 | lm loss: 3.119577E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 819.242 | TFLOPs: 28.68 | +7: iteration 13230/ 21553 | consumed samples: 3386880 | consumed tokens: 6936330240 | elapsed time per iteration (s): 0.30 | learning rate: 7.953E-05 | global batch size: 256 | lm loss: 3.110598E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.512 | TFLOPs: 29.53 | +7: iteration 13240/ 21553 | consumed samples: 3389440 | consumed tokens: 6941573120 | elapsed time per iteration (s): 0.30 | learning rate: 7.941E-05 | global batch size: 256 | lm loss: 3.112638E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.459 | TFLOPs: 29.70 | +7: iteration 13250/ 21553 | consumed samples: 3392000 | consumed tokens: 6946816000 | elapsed time per iteration (s): 0.30 | learning rate: 7.928E-05 | global batch size: 256 | lm loss: 3.114635E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.979 | TFLOPs: 29.48 | +7: iteration 13260/ 21553 | consumed samples: 3394560 | consumed tokens: 6952058880 | elapsed time per iteration (s): 0.30 | learning rate: 7.916E-05 | global batch size: 256 | lm loss: 3.102215E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.268 | TFLOPs: 29.73 | +7: iteration 13270/ 21553 | consumed samples: 3397120 | consumed tokens: 6957301760 | elapsed time per iteration (s): 0.31 | learning rate: 7.903E-05 | global batch size: 256 | lm loss: 3.126796E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.201 | TFLOPs: 29.27 | +7: iteration 13280/ 21553 | consumed samples: 3399680 | consumed tokens: 6962544640 | elapsed time per iteration (s): 0.31 | learning rate: 7.891E-05 | global batch size: 256 | lm loss: 3.104069E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.100 | TFLOPs: 29.09 | +7: iteration 13290/ 21553 | consumed samples: 3402240 | consumed tokens: 6967787520 | elapsed time per iteration (s): 0.30 | learning rate: 7.878E-05 | global batch size: 256 | lm loss: 3.104269E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.779 | TFLOPs: 29.61 | +7: iteration 13300/ 21553 | consumed samples: 3404800 | consumed tokens: 6973030400 | elapsed time per iteration (s): 0.30 | learning rate: 7.866E-05 | global batch size: 256 | lm loss: 3.116162E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.769 | TFLOPs: 29.47 | +7: iteration 13310/ 21553 | consumed samples: 3407360 | consumed tokens: 6978273280 | elapsed time per iteration (s): 0.30 | learning rate: 7.854E-05 | global batch size: 256 | lm loss: 3.102179E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.638 | TFLOPs: 30.02 | +7: iteration 13320/ 21553 | consumed samples: 3409920 | consumed tokens: 6983516160 | elapsed time per iteration (s): 0.30 | learning rate: 7.841E-05 | global batch size: 256 | lm loss: 3.114018E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.259 | TFLOPs: 29.49 | +7: iteration 13330/ 21553 | consumed samples: 3412480 | consumed tokens: 6988759040 | elapsed time per iteration (s): 0.30 | learning rate: 7.829E-05 | global batch size: 256 | lm loss: 3.115928E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.846 | TFLOPs: 30.07 | +7: iteration 13340/ 21553 | consumed samples: 3415040 | consumed tokens: 6994001920 | elapsed time per iteration (s): 0.31 | learning rate: 7.816E-05 | global batch size: 256 | lm loss: 3.114907E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.022 | TFLOPs: 29.23 | +7: iteration 13350/ 21553 | consumed samples: 3417600 | consumed tokens: 6999244800 | elapsed time per iteration (s): 0.30 | learning rate: 7.804E-05 | global batch size: 256 | lm loss: 3.109758E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.455 | TFLOPs: 29.98 | +7: iteration 13360/ 21553 | consumed samples: 3420160 | consumed tokens: 7004487680 | elapsed time per iteration (s): 0.30 | learning rate: 7.792E-05 | global batch size: 256 | lm loss: 3.105438E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.118 | TFLOPs: 29.97 | +7: iteration 13370/ 21553 | consumed samples: 3422720 | consumed tokens: 7009730560 | elapsed time per iteration (s): 0.31 | learning rate: 7.779E-05 | global batch size: 256 | lm loss: 3.099713E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.911 | TFLOPs: 29.05 | +7: iteration 13380/ 21553 | consumed samples: 3425280 | consumed tokens: 7014973440 | elapsed time per iteration (s): 0.30 | learning rate: 7.767E-05 | global batch size: 256 | lm loss: 3.105873E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.976 | TFLOPs: 29.76 | +7: iteration 13390/ 21553 | consumed samples: 3427840 | consumed tokens: 7020216320 | elapsed time per iteration (s): 0.31 | learning rate: 7.754E-05 | global batch size: 256 | lm loss: 3.111739E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 828.723 | TFLOPs: 29.01 | +7: iteration 13400/ 21553 | consumed samples: 3430400 | consumed tokens: 7025459200 | elapsed time per iteration (s): 0.30 | learning rate: 7.742E-05 | global batch size: 256 | lm loss: 3.100306E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.848 | TFLOPs: 29.65 | +7: iteration 13410/ 21553 | consumed samples: 3432960 | consumed tokens: 7030702080 | elapsed time per iteration (s): 0.30 | learning rate: 7.730E-05 | global batch size: 256 | lm loss: 3.110838E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.200 | TFLOPs: 30.11 | +7: iteration 13420/ 21553 | consumed samples: 3435520 | consumed tokens: 7035944960 | elapsed time per iteration (s): 0.30 | learning rate: 7.717E-05 | global batch size: 256 | lm loss: 3.101358E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.936 | TFLOPs: 29.75 | +7: iteration 13430/ 21553 | consumed samples: 3438080 | consumed tokens: 7041187840 | elapsed time per iteration (s): 0.30 | learning rate: 7.705E-05 | global batch size: 256 | lm loss: 3.118618E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.036 | TFLOPs: 29.65 | +7: iteration 13440/ 21553 | consumed samples: 3440640 | consumed tokens: 7046430720 | elapsed time per iteration (s): 0.31 | learning rate: 7.693E-05 | global batch size: 256 | lm loss: 3.112722E+00 | grad norm: 0.401 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.250 | TFLOPs: 29.13 | +7: iteration 13450/ 21553 | consumed samples: 3443200 | consumed tokens: 7051673600 | elapsed time per iteration (s): 0.30 | learning rate: 7.680E-05 | global batch size: 256 | lm loss: 3.113084E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.401 | TFLOPs: 29.81 | +7: iteration 13460/ 21553 | consumed samples: 3445760 | consumed tokens: 7056916480 | elapsed time per iteration (s): 0.30 | learning rate: 7.668E-05 | global batch size: 256 | lm loss: 3.113973E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.186 | TFLOPs: 29.97 | +7: iteration 13470/ 21553 | consumed samples: 3448320 | consumed tokens: 7062159360 | elapsed time per iteration (s): 0.30 | learning rate: 7.656E-05 | global batch size: 256 | lm loss: 3.122173E+00 | grad norm: 0.497 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.515 | TFLOPs: 29.98 | +7: iteration 13480/ 21553 | consumed samples: 3450880 | consumed tokens: 7067402240 | elapsed time per iteration (s): 0.30 | learning rate: 7.644E-05 | global batch size: 256 | lm loss: 3.111234E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.184 | TFLOPs: 29.41 | +7: iteration 13490/ 21553 | consumed samples: 3453440 | consumed tokens: 7072645120 | elapsed time per iteration (s): 0.30 | learning rate: 7.631E-05 | global batch size: 256 | lm loss: 3.101080E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.683 | TFLOPs: 29.61 | +7: iteration 13500/ 21553 | consumed samples: 3456000 | consumed tokens: 7077888000 | elapsed time per iteration (s): 0.30 | learning rate: 7.619E-05 | global batch size: 256 | lm loss: 3.107789E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.525 | TFLOPs: 29.56 | +7: iteration 13510/ 21553 | consumed samples: 3458560 | consumed tokens: 7083130880 | elapsed time per iteration (s): 0.30 | learning rate: 7.607E-05 | global batch size: 256 | lm loss: 3.119090E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.791 | TFLOPs: 29.50 | +7: iteration 13520/ 21553 | consumed samples: 3461120 | consumed tokens: 7088373760 | elapsed time per iteration (s): 0.30 | learning rate: 7.594E-05 | global batch size: 256 | lm loss: 3.105417E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.434 | TFLOPs: 29.42 | +7: iteration 13530/ 21553 | consumed samples: 3463680 | consumed tokens: 7093616640 | elapsed time per iteration (s): 0.30 | learning rate: 7.582E-05 | global batch size: 256 | lm loss: 3.114570E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.759 | TFLOPs: 29.50 | +7: iteration 13540/ 21553 | consumed samples: 3466240 | consumed tokens: 7098859520 | elapsed time per iteration (s): 0.30 | learning rate: 7.570E-05 | global batch size: 256 | lm loss: 3.094288E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.437 | TFLOPs: 29.63 | +7: iteration 13550/ 21553 | consumed samples: 3468800 | consumed tokens: 7104102400 | elapsed time per iteration (s): 0.30 | learning rate: 7.558E-05 | global batch size: 256 | lm loss: 3.102041E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.958 | TFLOPs: 29.79 | +7: iteration 13560/ 21553 | consumed samples: 3471360 | consumed tokens: 7109345280 | elapsed time per iteration (s): 0.30 | learning rate: 7.545E-05 | global batch size: 256 | lm loss: 3.113065E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.202 | TFLOPs: 30.32 | +7: iteration 13570/ 21553 | consumed samples: 3473920 | consumed tokens: 7114588160 | elapsed time per iteration (s): 0.30 | learning rate: 7.533E-05 | global batch size: 256 | lm loss: 3.093954E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.021 | TFLOPs: 29.58 | +7: iteration 13580/ 21553 | consumed samples: 3476480 | consumed tokens: 7119831040 | elapsed time per iteration (s): 0.30 | learning rate: 7.521E-05 | global batch size: 256 | lm loss: 3.116001E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.537 | TFLOPs: 29.84 | +7: iteration 13590/ 21553 | consumed samples: 3479040 | consumed tokens: 7125073920 | elapsed time per iteration (s): 0.30 | learning rate: 7.509E-05 | global batch size: 256 | lm loss: 3.111683E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.561 | TFLOPs: 29.64 | +7: iteration 13600/ 21553 | consumed samples: 3481600 | consumed tokens: 7130316800 | elapsed time per iteration (s): 0.31 | learning rate: 7.497E-05 | global batch size: 256 | lm loss: 3.105294E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.457 | TFLOPs: 29.07 | +7: iteration 13610/ 21553 | consumed samples: 3484160 | consumed tokens: 7135559680 | elapsed time per iteration (s): 0.31 | learning rate: 7.484E-05 | global batch size: 256 | lm loss: 3.120020E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.215 | TFLOPs: 29.03 | +7: iteration 13620/ 21553 | consumed samples: 3486720 | consumed tokens: 7140802560 | elapsed time per iteration (s): 0.31 | learning rate: 7.472E-05 | global batch size: 256 | lm loss: 3.100038E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 821.901 | TFLOPs: 28.77 | +7: iteration 13630/ 21553 | consumed samples: 3489280 | consumed tokens: 7146045440 | elapsed time per iteration (s): 0.30 | learning rate: 7.460E-05 | global batch size: 256 | lm loss: 3.112320E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.318 | TFLOPs: 29.59 | +7: iteration 13640/ 21553 | consumed samples: 3491840 | consumed tokens: 7151288320 | elapsed time per iteration (s): 0.30 | learning rate: 7.448E-05 | global batch size: 256 | lm loss: 3.100698E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.900 | TFLOPs: 29.58 | +7: iteration 13650/ 21553 | consumed samples: 3494400 | consumed tokens: 7156531200 | elapsed time per iteration (s): 0.30 | learning rate: 7.436E-05 | global batch size: 256 | lm loss: 3.104544E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.118 | TFLOPs: 30.08 | +7: iteration 13660/ 21553 | consumed samples: 3496960 | consumed tokens: 7161774080 | elapsed time per iteration (s): 0.30 | learning rate: 7.423E-05 | global batch size: 256 | lm loss: 3.101511E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.061 | TFLOPs: 29.58 | +7: iteration 13670/ 21553 | consumed samples: 3499520 | consumed tokens: 7167016960 | elapsed time per iteration (s): 0.30 | learning rate: 7.411E-05 | global batch size: 256 | lm loss: 3.101569E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.269 | TFLOPs: 29.42 | +7: iteration 13680/ 21553 | consumed samples: 3502080 | consumed tokens: 7172259840 | elapsed time per iteration (s): 0.31 | learning rate: 7.399E-05 | global batch size: 256 | lm loss: 3.110677E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 826.249 | TFLOPs: 28.92 | +7: iteration 13690/ 21553 | consumed samples: 3504640 | consumed tokens: 7177502720 | elapsed time per iteration (s): 0.30 | learning rate: 7.387E-05 | global batch size: 256 | lm loss: 3.119024E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.615 | TFLOPs: 29.95 | +7: iteration 13700/ 21553 | consumed samples: 3507200 | consumed tokens: 7182745600 | elapsed time per iteration (s): 0.31 | learning rate: 7.375E-05 | global batch size: 256 | lm loss: 3.116948E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 836.547 | TFLOPs: 29.29 | +7: iteration 13710/ 21553 | consumed samples: 3509760 | consumed tokens: 7187988480 | elapsed time per iteration (s): 0.30 | learning rate: 7.363E-05 | global batch size: 256 | lm loss: 3.107349E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 845.636 | TFLOPs: 29.60 | +7: iteration 13720/ 21553 | consumed samples: 3512320 | consumed tokens: 7193231360 | elapsed time per iteration (s): 0.30 | learning rate: 7.351E-05 | global batch size: 256 | lm loss: 3.103393E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.052 | TFLOPs: 29.62 | +7: iteration 13730/ 21553 | consumed samples: 3514880 | consumed tokens: 7198474240 | elapsed time per iteration (s): 0.30 | learning rate: 7.339E-05 | global batch size: 256 | lm loss: 3.111102E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.142 | TFLOPs: 29.41 | +7: iteration 13740/ 21553 | consumed samples: 3517440 | consumed tokens: 7203717120 | elapsed time per iteration (s): 0.30 | learning rate: 7.326E-05 | global batch size: 256 | lm loss: 3.097953E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.351 | TFLOPs: 30.12 | +7: iteration 13750/ 21553 | consumed samples: 3520000 | consumed tokens: 7208960000 | elapsed time per iteration (s): 0.30 | learning rate: 7.314E-05 | global batch size: 256 | lm loss: 3.098427E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.403 | TFLOPs: 30.05 | +7: iteration 13760/ 21553 | consumed samples: 3522560 | consumed tokens: 7214202880 | elapsed time per iteration (s): 0.30 | learning rate: 7.302E-05 | global batch size: 256 | lm loss: 3.115556E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.154 | TFLOPs: 29.62 | +7: iteration 13770/ 21553 | consumed samples: 3525120 | consumed tokens: 7219445760 | elapsed time per iteration (s): 0.30 | learning rate: 7.290E-05 | global batch size: 256 | lm loss: 3.101970E+00 | grad norm: 0.464 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.594 | TFLOPs: 29.57 | +7: iteration 13780/ 21553 | consumed samples: 3527680 | consumed tokens: 7224688640 | elapsed time per iteration (s): 0.30 | learning rate: 7.278E-05 | global batch size: 256 | lm loss: 3.107607E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.870 | TFLOPs: 30.21 | +7: iteration 13790/ 21553 | consumed samples: 3530240 | consumed tokens: 7229931520 | elapsed time per iteration (s): 0.30 | learning rate: 7.266E-05 | global batch size: 256 | lm loss: 3.097723E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.040 | TFLOPs: 30.04 | +7: iteration 13800/ 21553 | consumed samples: 3532800 | consumed tokens: 7235174400 | elapsed time per iteration (s): 0.31 | learning rate: 7.254E-05 | global batch size: 256 | lm loss: 3.084431E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.927 | TFLOPs: 29.12 | +7: iteration 13810/ 21553 | consumed samples: 3535360 | consumed tokens: 7240417280 | elapsed time per iteration (s): 0.30 | learning rate: 7.242E-05 | global batch size: 256 | lm loss: 3.106813E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.240 | TFLOPs: 29.90 | +7: iteration 13820/ 21553 | consumed samples: 3537920 | consumed tokens: 7245660160 | elapsed time per iteration (s): 0.30 | learning rate: 7.230E-05 | global batch size: 256 | lm loss: 3.103725E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.722 | TFLOPs: 29.71 | +7: iteration 13830/ 21553 | consumed samples: 3540480 | consumed tokens: 7250903040 | elapsed time per iteration (s): 0.30 | learning rate: 7.218E-05 | global batch size: 256 | lm loss: 3.110622E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.210 | TFLOPs: 29.73 | +7: iteration 13840/ 21553 | consumed samples: 3543040 | consumed tokens: 7256145920 | elapsed time per iteration (s): 0.31 | learning rate: 7.206E-05 | global batch size: 256 | lm loss: 3.091841E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.164 | TFLOPs: 29.31 | +7: iteration 13850/ 21553 | consumed samples: 3545600 | consumed tokens: 7261388800 | elapsed time per iteration (s): 0.30 | learning rate: 7.194E-05 | global batch size: 256 | lm loss: 3.100274E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.406 | TFLOPs: 29.46 | +7: iteration 13860/ 21553 | consumed samples: 3548160 | consumed tokens: 7266631680 | elapsed time per iteration (s): 0.31 | learning rate: 7.182E-05 | global batch size: 256 | lm loss: 3.098877E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 832.033 | TFLOPs: 29.13 | +7: iteration 13870/ 21553 | consumed samples: 3550720 | consumed tokens: 7271874560 | elapsed time per iteration (s): 0.30 | learning rate: 7.170E-05 | global batch size: 256 | lm loss: 3.099290E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.956 | TFLOPs: 29.44 | +7: iteration 13880/ 21553 | consumed samples: 3553280 | consumed tokens: 7277117440 | elapsed time per iteration (s): 0.30 | learning rate: 7.158E-05 | global batch size: 256 | lm loss: 3.099704E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.377 | TFLOPs: 29.84 | +7: iteration 13890/ 21553 | consumed samples: 3555840 | consumed tokens: 7282360320 | elapsed time per iteration (s): 0.30 | learning rate: 7.146E-05 | global batch size: 256 | lm loss: 3.090524E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.653 | TFLOPs: 29.67 | +7: iteration 13900/ 21553 | consumed samples: 3558400 | consumed tokens: 7287603200 | elapsed time per iteration (s): 0.31 | learning rate: 7.134E-05 | global batch size: 256 | lm loss: 3.098005E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 831.863 | TFLOPs: 29.12 | +7: iteration 13910/ 21553 | consumed samples: 3560960 | consumed tokens: 7292846080 | elapsed time per iteration (s): 0.31 | learning rate: 7.122E-05 | global batch size: 256 | lm loss: 3.099431E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 818.590 | TFLOPs: 28.66 | +7: iteration 13920/ 21553 | consumed samples: 3563520 | consumed tokens: 7298088960 | elapsed time per iteration (s): 0.30 | learning rate: 7.110E-05 | global batch size: 256 | lm loss: 3.093059E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.317 | TFLOPs: 29.42 | +7: iteration 13930/ 21553 | consumed samples: 3566080 | consumed tokens: 7303331840 | elapsed time per iteration (s): 0.30 | learning rate: 7.098E-05 | global batch size: 256 | lm loss: 3.100277E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.404 | TFLOPs: 29.81 | +7: iteration 13940/ 21553 | consumed samples: 3568640 | consumed tokens: 7308574720 | elapsed time per iteration (s): 0.30 | learning rate: 7.086E-05 | global batch size: 256 | lm loss: 3.108388E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.401 | TFLOPs: 29.67 | +7: iteration 13950/ 21553 | consumed samples: 3571200 | consumed tokens: 7313817600 | elapsed time per iteration (s): 0.30 | learning rate: 7.074E-05 | global batch size: 256 | lm loss: 3.086045E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.083 | TFLOPs: 29.90 | +7: iteration 13960/ 21553 | consumed samples: 3573760 | consumed tokens: 7319060480 | elapsed time per iteration (s): 0.30 | learning rate: 7.062E-05 | global batch size: 256 | lm loss: 3.095609E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.500 | TFLOPs: 30.33 | +7: iteration 13970/ 21553 | consumed samples: 3576320 | consumed tokens: 7324303360 | elapsed time per iteration (s): 0.30 | learning rate: 7.050E-05 | global batch size: 256 | lm loss: 3.095409E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.125 | TFLOPs: 29.62 | +7: iteration 13980/ 21553 | consumed samples: 3578880 | consumed tokens: 7329546240 | elapsed time per iteration (s): 0.31 | learning rate: 7.038E-05 | global batch size: 256 | lm loss: 3.100316E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.261 | TFLOPs: 29.17 | +7: iteration 13990/ 21553 | consumed samples: 3581440 | consumed tokens: 7334789120 | elapsed time per iteration (s): 0.30 | learning rate: 7.027E-05 | global batch size: 256 | lm loss: 3.104810E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.873 | TFLOPs: 29.47 | +0: [2023-03-14 00:28:00,316] [INFO] [logging.py:68:log_dist] [Rank 0] step=14000, skipped=0, lr=[7.014654688611906e-05, 7.014654688611906e-05, 7.014654688611906e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 14000/ 21553 | consumed samples: 3584000 | consumed tokens: 7340032000 | elapsed time per iteration (s): 0.31 | learning rate: 7.015E-05 | global batch size: 256 | lm loss: 3.091780E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.917 | TFLOPs: 29.33 | +0: steps: 14000 loss: 3.0628 iter time (s): 0.298 samples/sec: 858.182 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 14000 | lm loss value: 3.768395E+00 | lm loss PPL: 4.331048E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 14000 to checkpoints_146m14b100m +0: [2023-03-14 00:28:00,485] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step14000 is begin to save! +0: [2023-03-14 00:28:00,489] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:28:00,585] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:28:00,586] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:28:00,601] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:28:00,601] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:28:00,616] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:28:00,617] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:28:00,631] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:28:00,632] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:28:00,647] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:28:00,647] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:28:00,662] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:28:00,662] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:28:00,677] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:28:00,677] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:28:00,692] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:28:00,693] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:28:00,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:28:00,708] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:28:00,723] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:28:00,723] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:28:00,738] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:28:00,738] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:28:00,753] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:28:00,754] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:28:00,769] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:28:00,769] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:28:00,784] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:28:00,784] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:28:00,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:28:00,799] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:28:00,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:28:00,815] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:28:00,816] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:28:00,816] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step14000/mp_rank_00_model_states.pt +0: [2023-03-14 00:28:00,816] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:28:00,818] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:28:00,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:28:00,869] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,870] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,870] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,870] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,872] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,872] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,872] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,872] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,872] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,873] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,873] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,874] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,874] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,874] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,874] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,874] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,874] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,875] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,875] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,876] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,876] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,876] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,876] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,876] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:28:00,876] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,876] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,878] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,879] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,879] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,879] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,879] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,879] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,879] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,879] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,879] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +4: [2023-03-14 00:28:00,881] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:28:00,881] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +5: [2023-03-14 00:28:00,885] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:28:00,885] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:28:00,885] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,891] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,891] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,892] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,892] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,892] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:28:00,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,893] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +6: [2023-03-14 00:28:00,893] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:28:00,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,890] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +3: [2023-03-14 00:28:00,890] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,895] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,895] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,895] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,896] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,896] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,896] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,897] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,897] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,897] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: [2023-03-14 00:28:00,898] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:28:00,898] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:28:00,901] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,901] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,901] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,901] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +1: [2023-03-14 00:28:00,901] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,904] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +7: [2023-03-14 00:28:00,905] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step14000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,911] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,912] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +2: [2023-03-14 00:28:00,912] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step14000 is ready now! +0: successfully saved checkpoint at iteration 14000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 480.61 +7: iteration 14010/ 21553 | consumed samples: 3586560 | consumed tokens: 7345274880 | elapsed time per iteration (s): 0.36 | learning rate: 7.003E-05 | global batch size: 256 | lm loss: 3.107754E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 710.291 | TFLOPs: 24.87 | +7: iteration 14020/ 21553 | consumed samples: 3589120 | consumed tokens: 7350517760 | elapsed time per iteration (s): 0.30 | learning rate: 6.991E-05 | global batch size: 256 | lm loss: 3.104354E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.273 | TFLOPs: 30.19 | +7: iteration 14030/ 21553 | consumed samples: 3591680 | consumed tokens: 7355760640 | elapsed time per iteration (s): 0.31 | learning rate: 6.979E-05 | global batch size: 256 | lm loss: 3.093493E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 833.949 | TFLOPs: 29.19 | +7: iteration 14040/ 21553 | consumed samples: 3594240 | consumed tokens: 7361003520 | elapsed time per iteration (s): 0.30 | learning rate: 6.967E-05 | global batch size: 256 | lm loss: 3.088917E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.110 | TFLOPs: 29.87 | +7: iteration 14050/ 21553 | consumed samples: 3596800 | consumed tokens: 7366246400 | elapsed time per iteration (s): 0.30 | learning rate: 6.955E-05 | global batch size: 256 | lm loss: 3.081671E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.335 | TFLOPs: 29.73 | +7: iteration 14060/ 21553 | consumed samples: 3599360 | consumed tokens: 7371489280 | elapsed time per iteration (s): 0.31 | learning rate: 6.944E-05 | global batch size: 256 | lm loss: 3.079821E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.655 | TFLOPs: 29.04 | +7: iteration 14070/ 21553 | consumed samples: 3601920 | consumed tokens: 7376732160 | elapsed time per iteration (s): 0.30 | learning rate: 6.932E-05 | global batch size: 256 | lm loss: 3.108925E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 844.010 | TFLOPs: 29.55 | +7: iteration 14080/ 21553 | consumed samples: 3604480 | consumed tokens: 7381975040 | elapsed time per iteration (s): 0.30 | learning rate: 6.920E-05 | global batch size: 256 | lm loss: 3.091294E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.006 | TFLOPs: 30.07 | +7: iteration 14090/ 21553 | consumed samples: 3607040 | consumed tokens: 7387217920 | elapsed time per iteration (s): 0.30 | learning rate: 6.908E-05 | global batch size: 256 | lm loss: 3.104967E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.372 | TFLOPs: 29.91 | +7: iteration 14100/ 21553 | consumed samples: 3609600 | consumed tokens: 7392460800 | elapsed time per iteration (s): 0.30 | learning rate: 6.896E-05 | global batch size: 256 | lm loss: 3.096219E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.451 | TFLOPs: 29.77 | +7: iteration 14110/ 21553 | consumed samples: 3612160 | consumed tokens: 7397703680 | elapsed time per iteration (s): 0.30 | learning rate: 6.884E-05 | global batch size: 256 | lm loss: 3.074944E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.962 | TFLOPs: 29.72 | +7: iteration 14120/ 21553 | consumed samples: 3614720 | consumed tokens: 7402946560 | elapsed time per iteration (s): 0.30 | learning rate: 6.873E-05 | global batch size: 256 | lm loss: 3.097335E+00 | grad norm: 0.476 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.270 | TFLOPs: 29.66 | +7: iteration 14130/ 21553 | consumed samples: 3617280 | consumed tokens: 7408189440 | elapsed time per iteration (s): 0.32 | learning rate: 6.861E-05 | global batch size: 256 | lm loss: 3.085096E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 799.213 | TFLOPs: 27.98 | +7: iteration 14140/ 21553 | consumed samples: 3619840 | consumed tokens: 7413432320 | elapsed time per iteration (s): 0.33 | learning rate: 6.849E-05 | global batch size: 256 | lm loss: 3.093742E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 786.153 | TFLOPs: 27.52 | +7: iteration 14150/ 21553 | consumed samples: 3622400 | consumed tokens: 7418675200 | elapsed time per iteration (s): 0.30 | learning rate: 6.837E-05 | global batch size: 256 | lm loss: 3.097494E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.691 | TFLOPs: 29.78 | +7: iteration 14160/ 21553 | consumed samples: 3624960 | consumed tokens: 7423918080 | elapsed time per iteration (s): 0.30 | learning rate: 6.826E-05 | global batch size: 256 | lm loss: 3.090986E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.676 | TFLOPs: 30.34 | +7: iteration 14170/ 21553 | consumed samples: 3627520 | consumed tokens: 7429160960 | elapsed time per iteration (s): 0.30 | learning rate: 6.814E-05 | global batch size: 256 | lm loss: 3.096689E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.905 | TFLOPs: 30.00 | +7: iteration 14180/ 21553 | consumed samples: 3630080 | consumed tokens: 7434403840 | elapsed time per iteration (s): 0.30 | learning rate: 6.802E-05 | global batch size: 256 | lm loss: 3.092388E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.234 | TFLOPs: 30.29 | +7: iteration 14190/ 21553 | consumed samples: 3632640 | consumed tokens: 7439646720 | elapsed time per iteration (s): 0.30 | learning rate: 6.791E-05 | global batch size: 256 | lm loss: 3.096672E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.286 | TFLOPs: 30.22 | +7: iteration 14200/ 21553 | consumed samples: 3635200 | consumed tokens: 7444889600 | elapsed time per iteration (s): 0.30 | learning rate: 6.779E-05 | global batch size: 256 | lm loss: 3.101657E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.099 | TFLOPs: 30.28 | +7: iteration 14210/ 21553 | consumed samples: 3637760 | consumed tokens: 7450132480 | elapsed time per iteration (s): 0.30 | learning rate: 6.767E-05 | global batch size: 256 | lm loss: 3.098107E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.521 | TFLOPs: 30.30 | +7: iteration 14220/ 21553 | consumed samples: 3640320 | consumed tokens: 7455375360 | elapsed time per iteration (s): 0.30 | learning rate: 6.755E-05 | global batch size: 256 | lm loss: 3.086414E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 842.200 | TFLOPs: 29.48 | +7: iteration 14230/ 21553 | consumed samples: 3642880 | consumed tokens: 7460618240 | elapsed time per iteration (s): 0.30 | learning rate: 6.744E-05 | global batch size: 256 | lm loss: 3.105284E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.571 | TFLOPs: 30.30 | +7: iteration 14240/ 21553 | consumed samples: 3645440 | consumed tokens: 7465861120 | elapsed time per iteration (s): 0.30 | learning rate: 6.732E-05 | global batch size: 256 | lm loss: 3.107011E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.363 | TFLOPs: 30.05 | +7: iteration 14250/ 21553 | consumed samples: 3648000 | consumed tokens: 7471104000 | elapsed time per iteration (s): 0.30 | learning rate: 6.720E-05 | global batch size: 256 | lm loss: 3.097025E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.430 | TFLOPs: 29.95 | +7: iteration 14260/ 21553 | consumed samples: 3650560 | consumed tokens: 7476346880 | elapsed time per iteration (s): 0.30 | learning rate: 6.709E-05 | global batch size: 256 | lm loss: 3.094978E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.982 | TFLOPs: 30.35 | +7: iteration 14270/ 21553 | consumed samples: 3653120 | consumed tokens: 7481589760 | elapsed time per iteration (s): 0.30 | learning rate: 6.697E-05 | global batch size: 256 | lm loss: 3.101279E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.804 | TFLOPs: 30.13 | +7: iteration 14280/ 21553 | consumed samples: 3655680 | consumed tokens: 7486832640 | elapsed time per iteration (s): 0.30 | learning rate: 6.685E-05 | global batch size: 256 | lm loss: 3.084635E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.135 | TFLOPs: 30.25 | +7: iteration 14290/ 21553 | consumed samples: 3658240 | consumed tokens: 7492075520 | elapsed time per iteration (s): 0.30 | learning rate: 6.674E-05 | global batch size: 256 | lm loss: 3.080313E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.887 | TFLOPs: 29.96 | +7: iteration 14300/ 21553 | consumed samples: 3660800 | consumed tokens: 7497318400 | elapsed time per iteration (s): 0.30 | learning rate: 6.662E-05 | global batch size: 256 | lm loss: 3.089553E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.223 | TFLOPs: 30.04 | +7: iteration 14310/ 21553 | consumed samples: 3663360 | consumed tokens: 7502561280 | elapsed time per iteration (s): 0.30 | learning rate: 6.651E-05 | global batch size: 256 | lm loss: 3.077593E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.746 | TFLOPs: 29.54 | +7: iteration 14320/ 21553 | consumed samples: 3665920 | consumed tokens: 7507804160 | elapsed time per iteration (s): 0.30 | learning rate: 6.639E-05 | global batch size: 256 | lm loss: 3.084528E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.397 | TFLOPs: 30.33 | +7: iteration 14330/ 21553 | consumed samples: 3668480 | consumed tokens: 7513047040 | elapsed time per iteration (s): 0.30 | learning rate: 6.627E-05 | global batch size: 256 | lm loss: 3.098389E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.843 | TFLOPs: 29.44 | +7: iteration 14340/ 21553 | consumed samples: 3671040 | consumed tokens: 7518289920 | elapsed time per iteration (s): 0.30 | learning rate: 6.616E-05 | global batch size: 256 | lm loss: 3.085647E+00 | grad norm: 0.480 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.822 | TFLOPs: 30.03 | +7: iteration 14350/ 21553 | consumed samples: 3673600 | consumed tokens: 7523532800 | elapsed time per iteration (s): 0.30 | learning rate: 6.604E-05 | global batch size: 256 | lm loss: 3.096942E+00 | grad norm: 0.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.090 | TFLOPs: 29.97 | +7: iteration 14360/ 21553 | consumed samples: 3676160 | consumed tokens: 7528775680 | elapsed time per iteration (s): 0.30 | learning rate: 6.593E-05 | global batch size: 256 | lm loss: 3.083067E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.415 | TFLOPs: 30.33 | +7: iteration 14370/ 21553 | consumed samples: 3678720 | consumed tokens: 7534018560 | elapsed time per iteration (s): 0.30 | learning rate: 6.581E-05 | global batch size: 256 | lm loss: 3.087398E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.510 | TFLOPs: 30.12 | +7: iteration 14380/ 21553 | consumed samples: 3681280 | consumed tokens: 7539261440 | elapsed time per iteration (s): 0.31 | learning rate: 6.570E-05 | global batch size: 256 | lm loss: 3.090314E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 829.339 | TFLOPs: 29.03 | +7: iteration 14390/ 21553 | consumed samples: 3683840 | consumed tokens: 7544504320 | elapsed time per iteration (s): 0.30 | learning rate: 6.558E-05 | global batch size: 256 | lm loss: 3.092139E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.593 | TFLOPs: 30.16 | +7: iteration 14400/ 21553 | consumed samples: 3686400 | consumed tokens: 7549747200 | elapsed time per iteration (s): 0.30 | learning rate: 6.547E-05 | global batch size: 256 | lm loss: 3.073827E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.710 | TFLOPs: 30.24 | +7: iteration 14410/ 21553 | consumed samples: 3688960 | consumed tokens: 7554990080 | elapsed time per iteration (s): 0.30 | learning rate: 6.535E-05 | global batch size: 256 | lm loss: 3.099426E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 843.974 | TFLOPs: 29.55 | +7: iteration 14420/ 21553 | consumed samples: 3691520 | consumed tokens: 7560232960 | elapsed time per iteration (s): 0.30 | learning rate: 6.524E-05 | global batch size: 256 | lm loss: 3.089035E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.962 | TFLOPs: 30.31 | +7: iteration 14430/ 21553 | consumed samples: 3694080 | consumed tokens: 7565475840 | elapsed time per iteration (s): 0.30 | learning rate: 6.512E-05 | global batch size: 256 | lm loss: 3.072975E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.495 | TFLOPs: 29.98 | +7: iteration 14440/ 21553 | consumed samples: 3696640 | consumed tokens: 7570718720 | elapsed time per iteration (s): 0.30 | learning rate: 6.501E-05 | global batch size: 256 | lm loss: 3.096792E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 846.626 | TFLOPs: 29.64 | +7: iteration 14450/ 21553 | consumed samples: 3699200 | consumed tokens: 7575961600 | elapsed time per iteration (s): 0.30 | learning rate: 6.489E-05 | global batch size: 256 | lm loss: 3.086423E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.830 | TFLOPs: 29.75 | +7: iteration 14460/ 21553 | consumed samples: 3701760 | consumed tokens: 7581204480 | elapsed time per iteration (s): 0.30 | learning rate: 6.478E-05 | global batch size: 256 | lm loss: 3.090782E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.140 | TFLOPs: 30.29 | +7: iteration 14470/ 21553 | consumed samples: 3704320 | consumed tokens: 7586447360 | elapsed time per iteration (s): 0.30 | learning rate: 6.466E-05 | global batch size: 256 | lm loss: 3.086109E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.825 | TFLOPs: 30.31 | +7: iteration 14480/ 21553 | consumed samples: 3706880 | consumed tokens: 7591690240 | elapsed time per iteration (s): 0.30 | learning rate: 6.455E-05 | global batch size: 256 | lm loss: 3.085490E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 841.390 | TFLOPs: 29.45 | +7: iteration 14490/ 21553 | consumed samples: 3709440 | consumed tokens: 7596933120 | elapsed time per iteration (s): 0.30 | learning rate: 6.443E-05 | global batch size: 256 | lm loss: 3.101259E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.217 | TFLOPs: 30.29 | +7: iteration 14500/ 21553 | consumed samples: 3712000 | consumed tokens: 7602176000 | elapsed time per iteration (s): 0.30 | learning rate: 6.432E-05 | global batch size: 256 | lm loss: 3.088268E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.645 | TFLOPs: 30.30 | +7: iteration 14510/ 21553 | consumed samples: 3714560 | consumed tokens: 7607418880 | elapsed time per iteration (s): 0.30 | learning rate: 6.421E-05 | global batch size: 256 | lm loss: 3.085465E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.512 | TFLOPs: 30.30 | +7: iteration 14520/ 21553 | consumed samples: 3717120 | consumed tokens: 7612661760 | elapsed time per iteration (s): 0.30 | learning rate: 6.409E-05 | global batch size: 256 | lm loss: 3.088915E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.412 | TFLOPs: 30.23 | +7: iteration 14530/ 21553 | consumed samples: 3719680 | consumed tokens: 7617904640 | elapsed time per iteration (s): 0.30 | learning rate: 6.398E-05 | global batch size: 256 | lm loss: 3.080154E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.637 | TFLOPs: 29.99 | +7: iteration 14540/ 21553 | consumed samples: 3722240 | consumed tokens: 7623147520 | elapsed time per iteration (s): 0.30 | learning rate: 6.386E-05 | global batch size: 256 | lm loss: 3.091740E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.810 | TFLOPs: 30.13 | +7: iteration 14550/ 21553 | consumed samples: 3724800 | consumed tokens: 7628390400 | elapsed time per iteration (s): 0.30 | learning rate: 6.375E-05 | global batch size: 256 | lm loss: 3.083852E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.014 | TFLOPs: 30.28 | +7: iteration 14560/ 21553 | consumed samples: 3727360 | consumed tokens: 7633633280 | elapsed time per iteration (s): 0.30 | learning rate: 6.364E-05 | global batch size: 256 | lm loss: 3.080242E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.217 | TFLOPs: 30.32 | +7: iteration 14570/ 21553 | consumed samples: 3729920 | consumed tokens: 7638876160 | elapsed time per iteration (s): 0.30 | learning rate: 6.352E-05 | global batch size: 256 | lm loss: 3.085669E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.596 | TFLOPs: 30.30 | +7: iteration 14580/ 21553 | consumed samples: 3732480 | consumed tokens: 7644119040 | elapsed time per iteration (s): 0.30 | learning rate: 6.341E-05 | global batch size: 256 | lm loss: 3.084999E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.546 | TFLOPs: 30.27 | +7: iteration 14590/ 21553 | consumed samples: 3735040 | consumed tokens: 7649361920 | elapsed time per iteration (s): 0.30 | learning rate: 6.330E-05 | global batch size: 256 | lm loss: 3.078883E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.135 | TFLOPs: 30.18 | +7: iteration 14600/ 21553 | consumed samples: 3737600 | consumed tokens: 7654604800 | elapsed time per iteration (s): 0.30 | learning rate: 6.318E-05 | global batch size: 256 | lm loss: 3.076632E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.223 | TFLOPs: 30.25 | +7: iteration 14610/ 21553 | consumed samples: 3740160 | consumed tokens: 7659847680 | elapsed time per iteration (s): 0.30 | learning rate: 6.307E-05 | global batch size: 256 | lm loss: 3.077663E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.188 | TFLOPs: 30.15 | +7: iteration 14620/ 21553 | consumed samples: 3742720 | consumed tokens: 7665090560 | elapsed time per iteration (s): 0.30 | learning rate: 6.296E-05 | global batch size: 256 | lm loss: 3.085200E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.878 | TFLOPs: 30.28 | +7: iteration 14630/ 21553 | consumed samples: 3745280 | consumed tokens: 7670333440 | elapsed time per iteration (s): 0.30 | learning rate: 6.284E-05 | global batch size: 256 | lm loss: 3.094917E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.046 | TFLOPs: 30.28 | +7: iteration 14640/ 21553 | consumed samples: 3747840 | consumed tokens: 7675576320 | elapsed time per iteration (s): 0.30 | learning rate: 6.273E-05 | global batch size: 256 | lm loss: 3.077663E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.469 | TFLOPs: 29.70 | +7: iteration 14650/ 21553 | consumed samples: 3750400 | consumed tokens: 7680819200 | elapsed time per iteration (s): 0.30 | learning rate: 6.262E-05 | global batch size: 256 | lm loss: 3.093730E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.070 | TFLOPs: 30.14 | +7: iteration 14660/ 21553 | consumed samples: 3752960 | consumed tokens: 7686062080 | elapsed time per iteration (s): 0.30 | learning rate: 6.251E-05 | global batch size: 256 | lm loss: 3.072107E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.164 | TFLOPs: 30.08 | +7: iteration 14670/ 21553 | consumed samples: 3755520 | consumed tokens: 7691304960 | elapsed time per iteration (s): 0.30 | learning rate: 6.239E-05 | global batch size: 256 | lm loss: 3.087226E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.226 | TFLOPs: 30.22 | +7: iteration 14680/ 21553 | consumed samples: 3758080 | consumed tokens: 7696547840 | elapsed time per iteration (s): 0.30 | learning rate: 6.228E-05 | global batch size: 256 | lm loss: 3.087386E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.113 | TFLOPs: 30.29 | +7: iteration 14690/ 21553 | consumed samples: 3760640 | consumed tokens: 7701790720 | elapsed time per iteration (s): 0.30 | learning rate: 6.217E-05 | global batch size: 256 | lm loss: 3.076454E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 849.327 | TFLOPs: 29.73 | +7: iteration 14700/ 21553 | consumed samples: 3763200 | consumed tokens: 7707033600 | elapsed time per iteration (s): 0.30 | learning rate: 6.206E-05 | global batch size: 256 | lm loss: 3.074301E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.691 | TFLOPs: 29.99 | +7: iteration 14710/ 21553 | consumed samples: 3765760 | consumed tokens: 7712276480 | elapsed time per iteration (s): 0.30 | learning rate: 6.194E-05 | global batch size: 256 | lm loss: 3.069802E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.319 | TFLOPs: 30.01 | +7: iteration 14720/ 21553 | consumed samples: 3768320 | consumed tokens: 7717519360 | elapsed time per iteration (s): 0.30 | learning rate: 6.183E-05 | global batch size: 256 | lm loss: 3.083891E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.037 | TFLOPs: 30.28 | +7: iteration 14730/ 21553 | consumed samples: 3770880 | consumed tokens: 7722762240 | elapsed time per iteration (s): 0.31 | learning rate: 6.172E-05 | global batch size: 256 | lm loss: 3.079211E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 835.015 | TFLOPs: 29.23 | +7: iteration 14740/ 21553 | consumed samples: 3773440 | consumed tokens: 7728005120 | elapsed time per iteration (s): 0.30 | learning rate: 6.161E-05 | global batch size: 256 | lm loss: 3.083495E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.901 | TFLOPs: 30.31 | +7: iteration 14750/ 21553 | consumed samples: 3776000 | consumed tokens: 7733248000 | elapsed time per iteration (s): 0.30 | learning rate: 6.150E-05 | global batch size: 256 | lm loss: 3.087070E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.046 | TFLOPs: 30.25 | +7: iteration 14760/ 21553 | consumed samples: 3778560 | consumed tokens: 7738490880 | elapsed time per iteration (s): 0.30 | learning rate: 6.139E-05 | global batch size: 256 | lm loss: 3.076380E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.372 | TFLOPs: 30.01 | +7: iteration 14770/ 21553 | consumed samples: 3781120 | consumed tokens: 7743733760 | elapsed time per iteration (s): 0.30 | learning rate: 6.127E-05 | global batch size: 256 | lm loss: 3.084877E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.773 | TFLOPs: 30.27 | +7: iteration 14780/ 21553 | consumed samples: 3783680 | consumed tokens: 7748976640 | elapsed time per iteration (s): 0.30 | learning rate: 6.116E-05 | global batch size: 256 | lm loss: 3.077991E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.369 | TFLOPs: 30.26 | +7: iteration 14790/ 21553 | consumed samples: 3786240 | consumed tokens: 7754219520 | elapsed time per iteration (s): 0.30 | learning rate: 6.105E-05 | global batch size: 256 | lm loss: 3.081799E+00 | grad norm: 0.412 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.505 | TFLOPs: 30.26 | +7: iteration 14800/ 21553 | consumed samples: 3788800 | consumed tokens: 7759462400 | elapsed time per iteration (s): 0.30 | learning rate: 6.094E-05 | global batch size: 256 | lm loss: 3.088738E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 852.740 | TFLOPs: 29.85 | +7: iteration 14810/ 21553 | consumed samples: 3791360 | consumed tokens: 7764705280 | elapsed time per iteration (s): 0.31 | learning rate: 6.083E-05 | global batch size: 256 | lm loss: 3.082780E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 837.923 | TFLOPs: 29.33 | +7: iteration 14820/ 21553 | consumed samples: 3793920 | consumed tokens: 7769948160 | elapsed time per iteration (s): 0.30 | learning rate: 6.072E-05 | global batch size: 256 | lm loss: 3.080658E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.162 | TFLOPs: 30.22 | +7: iteration 14830/ 21553 | consumed samples: 3796480 | consumed tokens: 7775191040 | elapsed time per iteration (s): 0.30 | learning rate: 6.061E-05 | global batch size: 256 | lm loss: 3.087332E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.109 | TFLOPs: 30.18 | +7: iteration 14840/ 21553 | consumed samples: 3799040 | consumed tokens: 7780433920 | elapsed time per iteration (s): 0.30 | learning rate: 6.050E-05 | global batch size: 256 | lm loss: 3.088243E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.662 | TFLOPs: 30.09 | +7: iteration 14850/ 21553 | consumed samples: 3801600 | consumed tokens: 7785676800 | elapsed time per iteration (s): 0.30 | learning rate: 6.039E-05 | global batch size: 256 | lm loss: 3.070465E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.163 | TFLOPs: 30.22 | +7: iteration 14860/ 21553 | consumed samples: 3804160 | consumed tokens: 7790919680 | elapsed time per iteration (s): 0.30 | learning rate: 6.028E-05 | global batch size: 256 | lm loss: 3.075129E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.059 | TFLOPs: 30.21 | +7: iteration 14870/ 21553 | consumed samples: 3806720 | consumed tokens: 7796162560 | elapsed time per iteration (s): 0.30 | learning rate: 6.016E-05 | global batch size: 256 | lm loss: 3.070767E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.462 | TFLOPs: 30.26 | +7: iteration 14880/ 21553 | consumed samples: 3809280 | consumed tokens: 7801405440 | elapsed time per iteration (s): 0.30 | learning rate: 6.005E-05 | global batch size: 256 | lm loss: 3.077379E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.085 | TFLOPs: 30.28 | +7: iteration 14890/ 21553 | consumed samples: 3811840 | consumed tokens: 7806648320 | elapsed time per iteration (s): 0.30 | learning rate: 5.994E-05 | global batch size: 256 | lm loss: 3.078517E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.848 | TFLOPs: 30.03 | +7: iteration 14900/ 21553 | consumed samples: 3814400 | consumed tokens: 7811891200 | elapsed time per iteration (s): 0.30 | learning rate: 5.983E-05 | global batch size: 256 | lm loss: 3.093475E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.913 | TFLOPs: 30.28 | +7: iteration 14910/ 21553 | consumed samples: 3816960 | consumed tokens: 7817134080 | elapsed time per iteration (s): 0.30 | learning rate: 5.972E-05 | global batch size: 256 | lm loss: 3.084463E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.155 | TFLOPs: 30.29 | +7: iteration 14920/ 21553 | consumed samples: 3819520 | consumed tokens: 7822376960 | elapsed time per iteration (s): 0.30 | learning rate: 5.961E-05 | global batch size: 256 | lm loss: 3.080169E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.970 | TFLOPs: 30.28 | +7: iteration 14930/ 21553 | consumed samples: 3822080 | consumed tokens: 7827619840 | elapsed time per iteration (s): 0.30 | learning rate: 5.950E-05 | global batch size: 256 | lm loss: 3.088979E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.124 | TFLOPs: 30.29 | +7: iteration 14940/ 21553 | consumed samples: 3824640 | consumed tokens: 7832862720 | elapsed time per iteration (s): 0.30 | learning rate: 5.940E-05 | global batch size: 256 | lm loss: 3.070983E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.441 | TFLOPs: 29.98 | +7: iteration 14950/ 21553 | consumed samples: 3827200 | consumed tokens: 7838105600 | elapsed time per iteration (s): 0.30 | learning rate: 5.929E-05 | global batch size: 256 | lm loss: 3.080212E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.353 | TFLOPs: 30.26 | +7: iteration 14960/ 21553 | consumed samples: 3829760 | consumed tokens: 7843348480 | elapsed time per iteration (s): 0.30 | learning rate: 5.918E-05 | global batch size: 256 | lm loss: 3.079920E+00 | grad norm: 0.460 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.193 | TFLOPs: 30.15 | +7: iteration 14970/ 21553 | consumed samples: 3832320 | consumed tokens: 7848591360 | elapsed time per iteration (s): 0.30 | learning rate: 5.907E-05 | global batch size: 256 | lm loss: 3.069069E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.789 | TFLOPs: 30.27 | +7: iteration 14980/ 21553 | consumed samples: 3834880 | consumed tokens: 7853834240 | elapsed time per iteration (s): 0.30 | learning rate: 5.896E-05 | global batch size: 256 | lm loss: 3.080701E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.429 | TFLOPs: 30.26 | +7: iteration 14990/ 21553 | consumed samples: 3837440 | consumed tokens: 7859077120 | elapsed time per iteration (s): 0.30 | learning rate: 5.885E-05 | global batch size: 256 | lm loss: 3.078204E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.878 | TFLOPs: 30.28 | +7: iteration 15000/ 21553 | consumed samples: 3840000 | consumed tokens: 7864320000 | elapsed time per iteration (s): 0.30 | learning rate: 5.874E-05 | global batch size: 256 | lm loss: 3.074896E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.231 | TFLOPs: 30.25 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 15000 | lm loss value: 3.787746E+00 | lm loss PPL: 4.415678E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 15000 to checkpoints_146m14b100m +0: [2023-03-14 00:32:59,808] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step15000 is begin to save! +0: [2023-03-14 00:32:59,812] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:32:59,901] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:32:59,901] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:32:59,917] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:32:59,917] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:32:59,932] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:32:59,932] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:32:59,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:32:59,947] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:32:59,962] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:32:59,962] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:32:59,977] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:32:59,978] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:32:59,993] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:32:59,993] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:33:00,008] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:33:00,008] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:33:00,023] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:33:00,023] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:33:00,038] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:33:00,038] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:33:00,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:33:00,053] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:33:00,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:33:00,069] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:33:00,083] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:33:00,084] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:33:00,099] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:33:00,099] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:33:00,114] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:33:00,114] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:33:00,129] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:33:00,129] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:33:00,130] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:33:00,131] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step15000/mp_rank_00_model_states.pt +0: [2023-03-14 00:33:00,131] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:33:00,135] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:33:00,151] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:33:00,185] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,185] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,185] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,187] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,187] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,187] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,188] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,188] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,188] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,189] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,189] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,189] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,189] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,189] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,189] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,190] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,190] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,190] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,190] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,191] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,191] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,191] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,191] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,191] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,192] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,192] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,192] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,192] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,193] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,193] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,193] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,196] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,196] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,196] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,196] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +7: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +2: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,193] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:33:00,193] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,198] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,198] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3: [2023-03-14 00:33:00,200] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-14 00:33:00,198] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +3: [2023-03-14 00:33:00,200] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,204] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,204] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,204] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,204] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,204] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: [2023-03-14 00:33:00,205] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:33:00,205] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,205] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,211] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,211] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,211] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,211] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +1: [2023-03-14 00:33:00,212] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:33:00,212] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:33:00,212] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +0: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +6: [2023-03-14 00:33:00,218] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +4: [2023-03-14 00:33:00,219] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,225] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step15000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +5: [2023-03-14 00:33:00,226] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step15000 is ready now! +0: successfully saved checkpoint at iteration 15000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 423.60 +7: iteration 15010/ 21553 | consumed samples: 3842560 | consumed tokens: 7869562880 | elapsed time per iteration (s): 0.35 | learning rate: 5.863E-05 | global batch size: 256 | lm loss: 3.075193E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 736.817 | TFLOPs: 25.79 | +7: iteration 15020/ 21553 | consumed samples: 3845120 | consumed tokens: 7874805760 | elapsed time per iteration (s): 0.30 | learning rate: 5.852E-05 | global batch size: 256 | lm loss: 3.078754E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.946 | TFLOPs: 30.28 | +7: iteration 15030/ 21553 | consumed samples: 3847680 | consumed tokens: 7880048640 | elapsed time per iteration (s): 0.30 | learning rate: 5.841E-05 | global batch size: 256 | lm loss: 3.075450E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.354 | TFLOPs: 30.26 | +7: iteration 15040/ 21553 | consumed samples: 3850240 | consumed tokens: 7885291520 | elapsed time per iteration (s): 0.30 | learning rate: 5.830E-05 | global batch size: 256 | lm loss: 3.072785E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.006 | TFLOPs: 30.28 | +7: iteration 15050/ 21553 | consumed samples: 3852800 | consumed tokens: 7890534400 | elapsed time per iteration (s): 0.30 | learning rate: 5.820E-05 | global batch size: 256 | lm loss: 3.070582E+00 | grad norm: 0.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.706 | TFLOPs: 30.27 | +7: iteration 15060/ 21553 | consumed samples: 3855360 | consumed tokens: 7895777280 | elapsed time per iteration (s): 0.30 | learning rate: 5.809E-05 | global batch size: 256 | lm loss: 3.075571E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.491 | TFLOPs: 30.26 | +7: iteration 15070/ 21553 | consumed samples: 3857920 | consumed tokens: 7901020160 | elapsed time per iteration (s): 0.30 | learning rate: 5.798E-05 | global batch size: 256 | lm loss: 3.090485E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.582 | TFLOPs: 30.27 | +7: iteration 15080/ 21553 | consumed samples: 3860480 | consumed tokens: 7906263040 | elapsed time per iteration (s): 0.30 | learning rate: 5.787E-05 | global batch size: 256 | lm loss: 3.071949E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.406 | TFLOPs: 30.26 | +7: iteration 15090/ 21553 | consumed samples: 3863040 | consumed tokens: 7911505920 | elapsed time per iteration (s): 0.30 | learning rate: 5.776E-05 | global batch size: 256 | lm loss: 3.067277E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.547 | TFLOPs: 30.30 | +7: iteration 15100/ 21553 | consumed samples: 3865600 | consumed tokens: 7916748800 | elapsed time per iteration (s): 0.30 | learning rate: 5.766E-05 | global batch size: 256 | lm loss: 3.077604E+00 | grad norm: 0.471 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.175 | TFLOPs: 30.29 | +7: iteration 15110/ 21553 | consumed samples: 3868160 | consumed tokens: 7921991680 | elapsed time per iteration (s): 0.30 | learning rate: 5.755E-05 | global batch size: 256 | lm loss: 3.072345E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.180 | TFLOPs: 30.18 | +7: iteration 15120/ 21553 | consumed samples: 3870720 | consumed tokens: 7927234560 | elapsed time per iteration (s): 0.30 | learning rate: 5.744E-05 | global batch size: 256 | lm loss: 3.068963E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.569 | TFLOPs: 30.27 | +7: iteration 15130/ 21553 | consumed samples: 3873280 | consumed tokens: 7932477440 | elapsed time per iteration (s): 0.30 | learning rate: 5.733E-05 | global batch size: 256 | lm loss: 3.072025E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.090 | TFLOPs: 30.28 | +7: iteration 15140/ 21553 | consumed samples: 3875840 | consumed tokens: 7937720320 | elapsed time per iteration (s): 0.30 | learning rate: 5.723E-05 | global batch size: 256 | lm loss: 3.070629E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.726 | TFLOPs: 30.27 | +7: iteration 15150/ 21553 | consumed samples: 3878400 | consumed tokens: 7942963200 | elapsed time per iteration (s): 0.30 | learning rate: 5.712E-05 | global batch size: 256 | lm loss: 3.063295E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.235 | TFLOPs: 30.29 | +7: iteration 15160/ 21553 | consumed samples: 3880960 | consumed tokens: 7948206080 | elapsed time per iteration (s): 0.30 | learning rate: 5.701E-05 | global batch size: 256 | lm loss: 3.071215E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.381 | TFLOPs: 30.26 | +7: iteration 15170/ 21553 | consumed samples: 3883520 | consumed tokens: 7953448960 | elapsed time per iteration (s): 0.30 | learning rate: 5.690E-05 | global batch size: 256 | lm loss: 3.078580E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.298 | TFLOPs: 30.26 | +7: iteration 15180/ 21553 | consumed samples: 3886080 | consumed tokens: 7958691840 | elapsed time per iteration (s): 0.30 | learning rate: 5.680E-05 | global batch size: 256 | lm loss: 3.074716E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.701 | TFLOPs: 30.27 | +7: iteration 15190/ 21553 | consumed samples: 3888640 | consumed tokens: 7963934720 | elapsed time per iteration (s): 0.30 | learning rate: 5.669E-05 | global batch size: 256 | lm loss: 3.074091E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.422 | TFLOPs: 30.26 | +7: iteration 15200/ 21553 | consumed samples: 3891200 | consumed tokens: 7969177600 | elapsed time per iteration (s): 0.30 | learning rate: 5.658E-05 | global batch size: 256 | lm loss: 3.078027E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.575 | TFLOPs: 30.27 | +7: iteration 15210/ 21553 | consumed samples: 3893760 | consumed tokens: 7974420480 | elapsed time per iteration (s): 0.30 | learning rate: 5.648E-05 | global batch size: 256 | lm loss: 3.069056E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.128 | TFLOPs: 30.25 | +7: iteration 15220/ 21553 | consumed samples: 3896320 | consumed tokens: 7979663360 | elapsed time per iteration (s): 0.30 | learning rate: 5.637E-05 | global batch size: 256 | lm loss: 3.079103E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.752 | TFLOPs: 30.27 | +7: iteration 15230/ 21553 | consumed samples: 3898880 | consumed tokens: 7984906240 | elapsed time per iteration (s): 0.30 | learning rate: 5.626E-05 | global batch size: 256 | lm loss: 3.075753E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.885 | TFLOPs: 30.31 | +7: iteration 15240/ 21553 | consumed samples: 3901440 | consumed tokens: 7990149120 | elapsed time per iteration (s): 0.30 | learning rate: 5.616E-05 | global batch size: 256 | lm loss: 3.076041E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.024 | TFLOPs: 30.28 | +7: iteration 15250/ 21553 | consumed samples: 3904000 | consumed tokens: 7995392000 | elapsed time per iteration (s): 0.30 | learning rate: 5.605E-05 | global batch size: 256 | lm loss: 3.068985E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.278 | TFLOPs: 30.26 | +7: iteration 15260/ 21553 | consumed samples: 3906560 | consumed tokens: 8000634880 | elapsed time per iteration (s): 0.30 | learning rate: 5.595E-05 | global batch size: 256 | lm loss: 3.062220E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 850.722 | TFLOPs: 29.78 | +7: iteration 15270/ 21553 | consumed samples: 3909120 | consumed tokens: 8005877760 | elapsed time per iteration (s): 0.30 | learning rate: 5.584E-05 | global batch size: 256 | lm loss: 3.072858E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.476 | TFLOPs: 30.30 | +7: iteration 15280/ 21553 | consumed samples: 3911680 | consumed tokens: 8011120640 | elapsed time per iteration (s): 0.30 | learning rate: 5.573E-05 | global batch size: 256 | lm loss: 3.073962E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.330 | TFLOPs: 30.29 | +7: iteration 15290/ 21553 | consumed samples: 3914240 | consumed tokens: 8016363520 | elapsed time per iteration (s): 0.30 | learning rate: 5.563E-05 | global batch size: 256 | lm loss: 3.063772E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.861 | TFLOPs: 30.31 | +7: iteration 15300/ 21553 | consumed samples: 3916800 | consumed tokens: 8021606400 | elapsed time per iteration (s): 0.30 | learning rate: 5.552E-05 | global batch size: 256 | lm loss: 3.056449E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.830 | TFLOPs: 30.28 | +7: iteration 15310/ 21553 | consumed samples: 3919360 | consumed tokens: 8026849280 | elapsed time per iteration (s): 0.30 | learning rate: 5.542E-05 | global batch size: 256 | lm loss: 3.068190E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.434 | TFLOPs: 30.30 | +7: iteration 15320/ 21553 | consumed samples: 3921920 | consumed tokens: 8032092160 | elapsed time per iteration (s): 0.30 | learning rate: 5.531E-05 | global batch size: 256 | lm loss: 3.066616E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.399 | TFLOPs: 30.30 | +7: iteration 15330/ 21553 | consumed samples: 3924480 | consumed tokens: 8037335040 | elapsed time per iteration (s): 0.30 | learning rate: 5.521E-05 | global batch size: 256 | lm loss: 3.068895E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.691 | TFLOPs: 30.34 | +7: iteration 15340/ 21553 | consumed samples: 3927040 | consumed tokens: 8042577920 | elapsed time per iteration (s): 0.30 | learning rate: 5.510E-05 | global batch size: 256 | lm loss: 3.075322E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.289 | TFLOPs: 30.33 | +7: iteration 15350/ 21553 | consumed samples: 3929600 | consumed tokens: 8047820800 | elapsed time per iteration (s): 0.30 | learning rate: 5.500E-05 | global batch size: 256 | lm loss: 3.087115E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.643 | TFLOPs: 30.30 | +7: iteration 15360/ 21553 | consumed samples: 3932160 | consumed tokens: 8053063680 | elapsed time per iteration (s): 0.30 | learning rate: 5.489E-05 | global batch size: 256 | lm loss: 3.063715E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.393 | TFLOPs: 30.30 | +7: iteration 15370/ 21553 | consumed samples: 3934720 | consumed tokens: 8058306560 | elapsed time per iteration (s): 0.30 | learning rate: 5.479E-05 | global batch size: 256 | lm loss: 3.079202E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.480 | TFLOPs: 30.30 | +7: iteration 15380/ 21553 | consumed samples: 3937280 | consumed tokens: 8063549440 | elapsed time per iteration (s): 0.30 | learning rate: 5.468E-05 | global batch size: 256 | lm loss: 3.067935E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.587 | TFLOPs: 30.30 | +7: iteration 15390/ 21553 | consumed samples: 3939840 | consumed tokens: 8068792320 | elapsed time per iteration (s): 0.30 | learning rate: 5.458E-05 | global batch size: 256 | lm loss: 3.063771E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.714 | TFLOPs: 30.31 | +7: iteration 15400/ 21553 | consumed samples: 3942400 | consumed tokens: 8074035200 | elapsed time per iteration (s): 0.30 | learning rate: 5.447E-05 | global batch size: 256 | lm loss: 3.062153E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.232 | TFLOPs: 30.29 | +7: iteration 15410/ 21553 | consumed samples: 3944960 | consumed tokens: 8079278080 | elapsed time per iteration (s): 0.30 | learning rate: 5.437E-05 | global batch size: 256 | lm loss: 3.070164E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.946 | TFLOPs: 30.28 | +7: iteration 15420/ 21553 | consumed samples: 3947520 | consumed tokens: 8084520960 | elapsed time per iteration (s): 0.30 | learning rate: 5.427E-05 | global batch size: 256 | lm loss: 3.064224E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.567 | TFLOPs: 30.30 | +7: iteration 15430/ 21553 | consumed samples: 3950080 | consumed tokens: 8089763840 | elapsed time per iteration (s): 0.30 | learning rate: 5.416E-05 | global batch size: 256 | lm loss: 3.057762E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.045 | TFLOPs: 30.32 | +7: iteration 15440/ 21553 | consumed samples: 3952640 | consumed tokens: 8095006720 | elapsed time per iteration (s): 0.30 | learning rate: 5.406E-05 | global batch size: 256 | lm loss: 3.068598E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.634 | TFLOPs: 30.30 | +7: iteration 15450/ 21553 | consumed samples: 3955200 | consumed tokens: 8100249600 | elapsed time per iteration (s): 0.30 | learning rate: 5.395E-05 | global batch size: 256 | lm loss: 3.062481E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.361 | TFLOPs: 30.29 | +7: iteration 15460/ 21553 | consumed samples: 3957760 | consumed tokens: 8105492480 | elapsed time per iteration (s): 0.30 | learning rate: 5.385E-05 | global batch size: 256 | lm loss: 3.059039E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.107 | TFLOPs: 30.29 | +7: iteration 15470/ 21553 | consumed samples: 3960320 | consumed tokens: 8110735360 | elapsed time per iteration (s): 0.30 | learning rate: 5.375E-05 | global batch size: 256 | lm loss: 3.064380E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.014 | TFLOPs: 30.25 | +7: iteration 15480/ 21553 | consumed samples: 3962880 | consumed tokens: 8115978240 | elapsed time per iteration (s): 0.30 | learning rate: 5.364E-05 | global batch size: 256 | lm loss: 3.069628E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.134 | TFLOPs: 30.25 | +7: iteration 15490/ 21553 | consumed samples: 3965440 | consumed tokens: 8121221120 | elapsed time per iteration (s): 0.30 | learning rate: 5.354E-05 | global batch size: 256 | lm loss: 3.051184E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.563 | TFLOPs: 30.27 | +7: iteration 15500/ 21553 | consumed samples: 3968000 | consumed tokens: 8126464000 | elapsed time per iteration (s): 0.30 | learning rate: 5.344E-05 | global batch size: 256 | lm loss: 3.068257E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.628 | TFLOPs: 29.92 | +7: iteration 15510/ 21553 | consumed samples: 3970560 | consumed tokens: 8131706880 | elapsed time per iteration (s): 0.30 | learning rate: 5.333E-05 | global batch size: 256 | lm loss: 3.065663E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.915 | TFLOPs: 30.28 | +7: iteration 15520/ 21553 | consumed samples: 3973120 | consumed tokens: 8136949760 | elapsed time per iteration (s): 0.30 | learning rate: 5.323E-05 | global batch size: 256 | lm loss: 3.078635E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.850 | TFLOPs: 30.28 | +7: iteration 15530/ 21553 | consumed samples: 3975680 | consumed tokens: 8142192640 | elapsed time per iteration (s): 0.30 | learning rate: 5.313E-05 | global batch size: 256 | lm loss: 3.056179E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.035 | TFLOPs: 30.28 | +7: iteration 15540/ 21553 | consumed samples: 3978240 | consumed tokens: 8147435520 | elapsed time per iteration (s): 0.30 | learning rate: 5.303E-05 | global batch size: 256 | lm loss: 3.082423E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.866 | TFLOPs: 30.28 | +7: iteration 15550/ 21553 | consumed samples: 3980800 | consumed tokens: 8152678400 | elapsed time per iteration (s): 0.30 | learning rate: 5.292E-05 | global batch size: 256 | lm loss: 3.058758E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.422 | TFLOPs: 30.30 | +7: iteration 15560/ 21553 | consumed samples: 3983360 | consumed tokens: 8157921280 | elapsed time per iteration (s): 0.30 | learning rate: 5.282E-05 | global batch size: 256 | lm loss: 3.061097E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.480 | TFLOPs: 30.19 | +7: iteration 15570/ 21553 | consumed samples: 3985920 | consumed tokens: 8163164160 | elapsed time per iteration (s): 0.30 | learning rate: 5.272E-05 | global batch size: 256 | lm loss: 3.054545E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.743 | TFLOPs: 30.27 | +7: iteration 15580/ 21553 | consumed samples: 3988480 | consumed tokens: 8168407040 | elapsed time per iteration (s): 0.30 | learning rate: 5.262E-05 | global batch size: 256 | lm loss: 3.071933E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.268 | TFLOPs: 30.29 | +7: iteration 15590/ 21553 | consumed samples: 3991040 | consumed tokens: 8173649920 | elapsed time per iteration (s): 0.30 | learning rate: 5.251E-05 | global batch size: 256 | lm loss: 3.081165E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.397 | TFLOPs: 30.30 | +7: iteration 15600/ 21553 | consumed samples: 3993600 | consumed tokens: 8178892800 | elapsed time per iteration (s): 0.30 | learning rate: 5.241E-05 | global batch size: 256 | lm loss: 3.069503E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.254 | TFLOPs: 30.29 | +7: iteration 15610/ 21553 | consumed samples: 3996160 | consumed tokens: 8184135680 | elapsed time per iteration (s): 0.30 | learning rate: 5.231E-05 | global batch size: 256 | lm loss: 3.068613E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.484 | TFLOPs: 30.30 | +7: iteration 15620/ 21553 | consumed samples: 3998720 | consumed tokens: 8189378560 | elapsed time per iteration (s): 0.30 | learning rate: 5.221E-05 | global batch size: 256 | lm loss: 3.053094E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.078 | TFLOPs: 30.28 | +7: iteration 15630/ 21553 | consumed samples: 4001280 | consumed tokens: 8194621440 | elapsed time per iteration (s): 0.30 | learning rate: 5.211E-05 | global batch size: 256 | lm loss: 3.065709E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.380 | TFLOPs: 30.29 | +7: iteration 15640/ 21553 | consumed samples: 4003840 | consumed tokens: 8199864320 | elapsed time per iteration (s): 0.30 | learning rate: 5.201E-05 | global batch size: 256 | lm loss: 3.065639E+00 | grad norm: 0.462 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.785 | TFLOPs: 30.31 | +7: iteration 15650/ 21553 | consumed samples: 4006400 | consumed tokens: 8205107200 | elapsed time per iteration (s): 0.30 | learning rate: 5.191E-05 | global batch size: 256 | lm loss: 3.071387E+00 | grad norm: 0.463 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.960 | TFLOPs: 30.31 | +7: iteration 15660/ 21553 | consumed samples: 4008960 | consumed tokens: 8210350080 | elapsed time per iteration (s): 0.30 | learning rate: 5.180E-05 | global batch size: 256 | lm loss: 3.077707E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.982 | TFLOPs: 30.28 | +7: iteration 15670/ 21553 | consumed samples: 4011520 | consumed tokens: 8215592960 | elapsed time per iteration (s): 0.30 | learning rate: 5.170E-05 | global batch size: 256 | lm loss: 3.068346E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.666 | TFLOPs: 30.30 | +7: iteration 15680/ 21553 | consumed samples: 4014080 | consumed tokens: 8220835840 | elapsed time per iteration (s): 0.30 | learning rate: 5.160E-05 | global batch size: 256 | lm loss: 3.052607E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.010 | TFLOPs: 30.32 | +7: iteration 15690/ 21553 | consumed samples: 4016640 | consumed tokens: 8226078720 | elapsed time per iteration (s): 0.30 | learning rate: 5.150E-05 | global batch size: 256 | lm loss: 3.061651E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.929 | TFLOPs: 30.31 | +7: iteration 15700/ 21553 | consumed samples: 4019200 | consumed tokens: 8231321600 | elapsed time per iteration (s): 0.30 | learning rate: 5.140E-05 | global batch size: 256 | lm loss: 3.057454E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.088 | TFLOPs: 30.32 | +7: iteration 15710/ 21553 | consumed samples: 4021760 | consumed tokens: 8236564480 | elapsed time per iteration (s): 0.30 | learning rate: 5.130E-05 | global batch size: 256 | lm loss: 3.067446E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.424 | TFLOPs: 30.30 | +7: iteration 15720/ 21553 | consumed samples: 4024320 | consumed tokens: 8241807360 | elapsed time per iteration (s): 0.30 | learning rate: 5.120E-05 | global batch size: 256 | lm loss: 3.062564E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.005 | TFLOPs: 30.32 | +7: iteration 15730/ 21553 | consumed samples: 4026880 | consumed tokens: 8247050240 | elapsed time per iteration (s): 0.30 | learning rate: 5.110E-05 | global batch size: 256 | lm loss: 3.061909E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.089 | TFLOPs: 30.32 | +7: iteration 15740/ 21553 | consumed samples: 4029440 | consumed tokens: 8252293120 | elapsed time per iteration (s): 0.30 | learning rate: 5.100E-05 | global batch size: 256 | lm loss: 3.062090E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.029 | TFLOPs: 29.97 | +7: iteration 15750/ 21553 | consumed samples: 4032000 | consumed tokens: 8257536000 | elapsed time per iteration (s): 0.30 | learning rate: 5.090E-05 | global batch size: 256 | lm loss: 3.072898E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.522 | TFLOPs: 30.19 | +7: iteration 15760/ 21553 | consumed samples: 4034560 | consumed tokens: 8262778880 | elapsed time per iteration (s): 0.30 | learning rate: 5.080E-05 | global batch size: 256 | lm loss: 3.059922E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.375 | TFLOPs: 30.33 | +7: iteration 15770/ 21553 | consumed samples: 4037120 | consumed tokens: 8268021760 | elapsed time per iteration (s): 0.30 | learning rate: 5.070E-05 | global batch size: 256 | lm loss: 3.063139E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.041 | TFLOPs: 30.35 | +7: iteration 15780/ 21553 | consumed samples: 4039680 | consumed tokens: 8273264640 | elapsed time per iteration (s): 0.30 | learning rate: 5.060E-05 | global batch size: 256 | lm loss: 3.054800E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.817 | TFLOPs: 30.31 | +7: iteration 15790/ 21553 | consumed samples: 4042240 | consumed tokens: 8278507520 | elapsed time per iteration (s): 0.30 | learning rate: 5.050E-05 | global batch size: 256 | lm loss: 3.074664E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.292 | TFLOPs: 30.33 | +7: iteration 15800/ 21553 | consumed samples: 4044800 | consumed tokens: 8283750400 | elapsed time per iteration (s): 0.30 | learning rate: 5.040E-05 | global batch size: 256 | lm loss: 3.070547E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.478 | TFLOPs: 30.33 | +7: iteration 15810/ 21553 | consumed samples: 4047360 | consumed tokens: 8288993280 | elapsed time per iteration (s): 0.30 | learning rate: 5.030E-05 | global batch size: 256 | lm loss: 3.065357E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.041 | TFLOPs: 30.32 | +7: iteration 15820/ 21553 | consumed samples: 4049920 | consumed tokens: 8294236160 | elapsed time per iteration (s): 0.30 | learning rate: 5.020E-05 | global batch size: 256 | lm loss: 3.055312E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.973 | TFLOPs: 30.32 | +7: iteration 15830/ 21553 | consumed samples: 4052480 | consumed tokens: 8299479040 | elapsed time per iteration (s): 0.30 | learning rate: 5.010E-05 | global batch size: 256 | lm loss: 3.059711E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.737 | TFLOPs: 30.34 | +7: iteration 15840/ 21553 | consumed samples: 4055040 | consumed tokens: 8304721920 | elapsed time per iteration (s): 0.30 | learning rate: 5.001E-05 | global batch size: 256 | lm loss: 3.056474E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.423 | TFLOPs: 30.33 | +7: iteration 15850/ 21553 | consumed samples: 4057600 | consumed tokens: 8309964800 | elapsed time per iteration (s): 0.30 | learning rate: 4.991E-05 | global batch size: 256 | lm loss: 3.072581E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.896 | TFLOPs: 30.35 | +7: iteration 15860/ 21553 | consumed samples: 4060160 | consumed tokens: 8315207680 | elapsed time per iteration (s): 0.30 | learning rate: 4.981E-05 | global batch size: 256 | lm loss: 3.049126E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.179 | TFLOPs: 30.32 | +7: iteration 15870/ 21553 | consumed samples: 4062720 | consumed tokens: 8320450560 | elapsed time per iteration (s): 0.30 | learning rate: 4.971E-05 | global batch size: 256 | lm loss: 3.042570E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.422 | TFLOPs: 30.33 | +7: iteration 15880/ 21553 | consumed samples: 4065280 | consumed tokens: 8325693440 | elapsed time per iteration (s): 0.30 | learning rate: 4.961E-05 | global batch size: 256 | lm loss: 3.061554E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.058 | TFLOPs: 30.32 | +7: iteration 15890/ 21553 | consumed samples: 4067840 | consumed tokens: 8330936320 | elapsed time per iteration (s): 0.30 | learning rate: 4.951E-05 | global batch size: 256 | lm loss: 3.059400E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.816 | TFLOPs: 30.31 | +7: iteration 15900/ 21553 | consumed samples: 4070400 | consumed tokens: 8336179200 | elapsed time per iteration (s): 0.30 | learning rate: 4.942E-05 | global batch size: 256 | lm loss: 3.061091E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.649 | TFLOPs: 30.34 | +7: iteration 15910/ 21553 | consumed samples: 4072960 | consumed tokens: 8341422080 | elapsed time per iteration (s): 0.30 | learning rate: 4.932E-05 | global batch size: 256 | lm loss: 3.057954E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.913 | TFLOPs: 30.31 | +7: iteration 15920/ 21553 | consumed samples: 4075520 | consumed tokens: 8346664960 | elapsed time per iteration (s): 0.30 | learning rate: 4.922E-05 | global batch size: 256 | lm loss: 3.076538E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.687 | TFLOPs: 30.31 | +7: iteration 15930/ 21553 | consumed samples: 4078080 | consumed tokens: 8351907840 | elapsed time per iteration (s): 0.30 | learning rate: 4.912E-05 | global batch size: 256 | lm loss: 3.064940E+00 | grad norm: 0.469 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.033 | TFLOPs: 30.32 | +7: iteration 15940/ 21553 | consumed samples: 4080640 | consumed tokens: 8357150720 | elapsed time per iteration (s): 0.30 | learning rate: 4.902E-05 | global batch size: 256 | lm loss: 3.053026E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.373 | TFLOPs: 30.29 | +7: iteration 15950/ 21553 | consumed samples: 4083200 | consumed tokens: 8362393600 | elapsed time per iteration (s): 0.30 | learning rate: 4.893E-05 | global batch size: 256 | lm loss: 3.062898E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.532 | TFLOPs: 30.30 | +7: iteration 15960/ 21553 | consumed samples: 4085760 | consumed tokens: 8367636480 | elapsed time per iteration (s): 0.30 | learning rate: 4.883E-05 | global batch size: 256 | lm loss: 3.055351E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.924 | TFLOPs: 30.31 | +7: iteration 15970/ 21553 | consumed samples: 4088320 | consumed tokens: 8372879360 | elapsed time per iteration (s): 0.30 | learning rate: 4.873E-05 | global batch size: 256 | lm loss: 3.052599E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.026 | TFLOPs: 30.32 | +7: iteration 15980/ 21553 | consumed samples: 4090880 | consumed tokens: 8378122240 | elapsed time per iteration (s): 0.30 | learning rate: 4.864E-05 | global batch size: 256 | lm loss: 3.059288E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.582 | TFLOPs: 30.30 | +7: iteration 15990/ 21553 | consumed samples: 4093440 | consumed tokens: 8383365120 | elapsed time per iteration (s): 0.30 | learning rate: 4.854E-05 | global batch size: 256 | lm loss: 3.042703E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.307 | TFLOPs: 30.29 | +0: [2023-03-14 00:37:56,182] [INFO] [logging.py:68:log_dist] [Rank 0] step=16000, skipped=0, lr=[4.8441849544340955e-05, 4.8441849544340955e-05, 4.8441849544340955e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 16000/ 21553 | consumed samples: 4096000 | consumed tokens: 8388608000 | elapsed time per iteration (s): 0.30 | learning rate: 4.844E-05 | global batch size: 256 | lm loss: 3.047837E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.940 | TFLOPs: 30.31 | +0: steps: 16000 loss: 3.0376 iter time (s): 0.295 samples/sec: 866.842 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 16000 | lm loss value: 3.817574E+00 | lm loss PPL: 4.549371E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 16000 to checkpoints_146m14b100m +0: [2023-03-14 00:37:56,302] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step16000 is begin to save! +0: [2023-03-14 00:37:56,305] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:37:56,392] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:37:56,392] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:37:56,408] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:37:56,408] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:37:56,423] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:37:56,423] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:37:56,438] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:37:56,438] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:37:56,453] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:37:56,453] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:37:56,468] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:37:56,468] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:37:56,483] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:37:56,484] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:37:56,498] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:37:56,499] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:37:56,513] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:37:56,514] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:37:56,529] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:37:56,529] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:37:56,544] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:37:56,544] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:37:56,559] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:37:56,559] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:37:56,574] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:37:56,575] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:37:56,591] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:37:56,591] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:37:56,606] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:37:56,606] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:37:56,621] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:37:56,621] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:37:56,622] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:37:56,623] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step16000/mp_rank_00_model_states.pt +0: [2023-03-14 00:37:56,623] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:37:56,625] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:37:56,642] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,643] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:37:56,673] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,673] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,673] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,675] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,675] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,675] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,676] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,676] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,676] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,676] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,677] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,677] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,677] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,677] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,677] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,677] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,678] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,678] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,678] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,678] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,678] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,678] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,679] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,679] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,679] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,679] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,680] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,680] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,681] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,681] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,681] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,681] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,681] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,682] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,682] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,682] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,682] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,682] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,682] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,683] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,683] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,683] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,683] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,683] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,683] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,684] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,684] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,685] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:37:56,685] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,685] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +1: [2023-03-14 00:37:56,685] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,686] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,686] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:37:56,686] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:37:56,686] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +7: [2023-03-14 00:37:56,686] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +5: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:37:56,687] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,687] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,687] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,687] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,688] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,687] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,688] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,688] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,688] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,688] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,688] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,689] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,689] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,689] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,689] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,689] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,689] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,689] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,690] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:37:56,690] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,690] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +6: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +3: [2023-03-14 00:37:56,690] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:37:56,691] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 00:37:56,691] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,694] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,694] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:37:56,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,695] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:37:56,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +4: [2023-03-14 00:37:56,695] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,703] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,704] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:37:56,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,704] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,704] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: [2023-03-14 00:37:56,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:37:56,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +2: [2023-03-14 00:37:56,709] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:37:56,709] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step16000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 00:37:56,709] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step16000 is ready now! +0: successfully saved checkpoint at iteration 16000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 412.20 +7: iteration 16010/ 21553 | consumed samples: 4098560 | consumed tokens: 8393850880 | elapsed time per iteration (s): 0.35 | learning rate: 4.835E-05 | global batch size: 256 | lm loss: 3.056797E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 729.087 | TFLOPs: 25.52 | +7: iteration 16020/ 21553 | consumed samples: 4101120 | consumed tokens: 8399093760 | elapsed time per iteration (s): 0.30 | learning rate: 4.825E-05 | global batch size: 256 | lm loss: 3.058592E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.437 | TFLOPs: 30.33 | +7: iteration 16030/ 21553 | consumed samples: 4103680 | consumed tokens: 8404336640 | elapsed time per iteration (s): 0.30 | learning rate: 4.815E-05 | global batch size: 256 | lm loss: 3.049746E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.437 | TFLOPs: 30.33 | +7: iteration 16040/ 21553 | consumed samples: 4106240 | consumed tokens: 8409579520 | elapsed time per iteration (s): 0.30 | learning rate: 4.806E-05 | global batch size: 256 | lm loss: 3.053988E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.974 | TFLOPs: 30.32 | +7: iteration 16050/ 21553 | consumed samples: 4108800 | consumed tokens: 8414822400 | elapsed time per iteration (s): 0.30 | learning rate: 4.796E-05 | global batch size: 256 | lm loss: 3.055300E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.688 | TFLOPs: 30.34 | +7: iteration 16060/ 21553 | consumed samples: 4111360 | consumed tokens: 8420065280 | elapsed time per iteration (s): 0.30 | learning rate: 4.786E-05 | global batch size: 256 | lm loss: 3.043985E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.792 | TFLOPs: 30.34 | +7: iteration 16070/ 21553 | consumed samples: 4113920 | consumed tokens: 8425308160 | elapsed time per iteration (s): 0.30 | learning rate: 4.777E-05 | global batch size: 256 | lm loss: 3.062595E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.233 | TFLOPs: 30.32 | +7: iteration 16080/ 21553 | consumed samples: 4116480 | consumed tokens: 8430551040 | elapsed time per iteration (s): 0.30 | learning rate: 4.767E-05 | global batch size: 256 | lm loss: 3.054486E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.199 | TFLOPs: 30.32 | +7: iteration 16090/ 21553 | consumed samples: 4119040 | consumed tokens: 8435793920 | elapsed time per iteration (s): 0.30 | learning rate: 4.758E-05 | global batch size: 256 | lm loss: 3.067716E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.147 | TFLOPs: 30.04 | +7: iteration 16100/ 21553 | consumed samples: 4121600 | consumed tokens: 8441036800 | elapsed time per iteration (s): 0.30 | learning rate: 4.748E-05 | global batch size: 256 | lm loss: 3.071804E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.430 | TFLOPs: 30.33 | +7: iteration 16110/ 21553 | consumed samples: 4124160 | consumed tokens: 8446279680 | elapsed time per iteration (s): 0.30 | learning rate: 4.739E-05 | global batch size: 256 | lm loss: 3.062530E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.773 | TFLOPs: 30.27 | +7: iteration 16120/ 21553 | consumed samples: 4126720 | consumed tokens: 8451522560 | elapsed time per iteration (s): 0.30 | learning rate: 4.729E-05 | global batch size: 256 | lm loss: 3.063568E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.828 | TFLOPs: 30.07 | +7: iteration 16130/ 21553 | consumed samples: 4129280 | consumed tokens: 8456765440 | elapsed time per iteration (s): 0.30 | learning rate: 4.720E-05 | global batch size: 256 | lm loss: 3.048180E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.335 | TFLOPs: 29.87 | +7: iteration 16140/ 21553 | consumed samples: 4131840 | consumed tokens: 8462008320 | elapsed time per iteration (s): 0.30 | learning rate: 4.710E-05 | global batch size: 256 | lm loss: 3.041518E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.244 | TFLOPs: 30.32 | +7: iteration 16150/ 21553 | consumed samples: 4134400 | consumed tokens: 8467251200 | elapsed time per iteration (s): 0.30 | learning rate: 4.701E-05 | global batch size: 256 | lm loss: 3.051029E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.003 | TFLOPs: 30.32 | +7: iteration 16160/ 21553 | consumed samples: 4136960 | consumed tokens: 8472494080 | elapsed time per iteration (s): 0.30 | learning rate: 4.691E-05 | global batch size: 256 | lm loss: 3.048004E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.688 | TFLOPs: 30.27 | +7: iteration 16170/ 21553 | consumed samples: 4139520 | consumed tokens: 8477736960 | elapsed time per iteration (s): 0.30 | learning rate: 4.682E-05 | global batch size: 256 | lm loss: 3.053582E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.506 | TFLOPs: 30.30 | +7: iteration 16180/ 21553 | consumed samples: 4142080 | consumed tokens: 8482979840 | elapsed time per iteration (s): 0.30 | learning rate: 4.672E-05 | global batch size: 256 | lm loss: 3.059403E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.807 | TFLOPs: 30.31 | +7: iteration 16190/ 21553 | consumed samples: 4144640 | consumed tokens: 8488222720 | elapsed time per iteration (s): 0.30 | learning rate: 4.663E-05 | global batch size: 256 | lm loss: 3.053111E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.550 | TFLOPs: 30.30 | +7: iteration 16200/ 21553 | consumed samples: 4147200 | consumed tokens: 8493465600 | elapsed time per iteration (s): 0.30 | learning rate: 4.654E-05 | global batch size: 256 | lm loss: 3.061369E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.983 | TFLOPs: 30.32 | +7: iteration 16210/ 21553 | consumed samples: 4149760 | consumed tokens: 8498708480 | elapsed time per iteration (s): 0.30 | learning rate: 4.644E-05 | global batch size: 256 | lm loss: 3.056695E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.717 | TFLOPs: 30.31 | +7: iteration 16220/ 21553 | consumed samples: 4152320 | consumed tokens: 8503951360 | elapsed time per iteration (s): 0.30 | learning rate: 4.635E-05 | global batch size: 256 | lm loss: 3.053715E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.463 | TFLOPs: 30.30 | +7: iteration 16230/ 21553 | consumed samples: 4154880 | consumed tokens: 8509194240 | elapsed time per iteration (s): 0.30 | learning rate: 4.625E-05 | global batch size: 256 | lm loss: 3.045357E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.904 | TFLOPs: 30.10 | +7: iteration 16240/ 21553 | consumed samples: 4157440 | consumed tokens: 8514437120 | elapsed time per iteration (s): 0.30 | learning rate: 4.616E-05 | global batch size: 256 | lm loss: 3.050559E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.817 | TFLOPs: 30.17 | +7: iteration 16250/ 21553 | consumed samples: 4160000 | consumed tokens: 8519680000 | elapsed time per iteration (s): 0.30 | learning rate: 4.607E-05 | global batch size: 256 | lm loss: 3.045726E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.887 | TFLOPs: 30.21 | +7: iteration 16260/ 21553 | consumed samples: 4162560 | consumed tokens: 8524922880 | elapsed time per iteration (s): 0.30 | learning rate: 4.597E-05 | global batch size: 256 | lm loss: 3.049392E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.315 | TFLOPs: 30.22 | +7: iteration 16270/ 21553 | consumed samples: 4165120 | consumed tokens: 8530165760 | elapsed time per iteration (s): 0.30 | learning rate: 4.588E-05 | global batch size: 256 | lm loss: 3.053038E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.393 | TFLOPs: 30.26 | +7: iteration 16280/ 21553 | consumed samples: 4167680 | consumed tokens: 8535408640 | elapsed time per iteration (s): 0.30 | learning rate: 4.579E-05 | global batch size: 256 | lm loss: 3.047196E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.318 | TFLOPs: 30.15 | +7: iteration 16290/ 21553 | consumed samples: 4170240 | consumed tokens: 8540651520 | elapsed time per iteration (s): 0.30 | learning rate: 4.570E-05 | global batch size: 256 | lm loss: 3.059826E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.724 | TFLOPs: 30.31 | +7: iteration 16300/ 21553 | consumed samples: 4172800 | consumed tokens: 8545894400 | elapsed time per iteration (s): 0.30 | learning rate: 4.560E-05 | global batch size: 256 | lm loss: 3.061288E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.558 | TFLOPs: 30.34 | +7: iteration 16310/ 21553 | consumed samples: 4175360 | consumed tokens: 8551137280 | elapsed time per iteration (s): 0.30 | learning rate: 4.551E-05 | global batch size: 256 | lm loss: 3.028327E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.533 | TFLOPs: 30.33 | +7: iteration 16320/ 21553 | consumed samples: 4177920 | consumed tokens: 8556380160 | elapsed time per iteration (s): 0.30 | learning rate: 4.542E-05 | global batch size: 256 | lm loss: 3.051179E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.342 | TFLOPs: 30.33 | +7: iteration 16330/ 21553 | consumed samples: 4180480 | consumed tokens: 8561623040 | elapsed time per iteration (s): 0.30 | learning rate: 4.533E-05 | global batch size: 256 | lm loss: 3.049006E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.141 | TFLOPs: 30.32 | +7: iteration 16340/ 21553 | consumed samples: 4183040 | consumed tokens: 8566865920 | elapsed time per iteration (s): 0.30 | learning rate: 4.523E-05 | global batch size: 256 | lm loss: 3.053912E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.588 | TFLOPs: 30.34 | +7: iteration 16350/ 21553 | consumed samples: 4185600 | consumed tokens: 8572108800 | elapsed time per iteration (s): 0.30 | learning rate: 4.514E-05 | global batch size: 256 | lm loss: 3.045194E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.313 | TFLOPs: 30.36 | +7: iteration 16360/ 21553 | consumed samples: 4188160 | consumed tokens: 8577351680 | elapsed time per iteration (s): 0.30 | learning rate: 4.505E-05 | global batch size: 256 | lm loss: 3.042990E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.514 | TFLOPs: 30.33 | +7: iteration 16370/ 21553 | consumed samples: 4190720 | consumed tokens: 8582594560 | elapsed time per iteration (s): 0.30 | learning rate: 4.496E-05 | global batch size: 256 | lm loss: 3.071623E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.862 | TFLOPs: 30.31 | +7: iteration 16380/ 21553 | consumed samples: 4193280 | consumed tokens: 8587837440 | elapsed time per iteration (s): 0.30 | learning rate: 4.487E-05 | global batch size: 256 | lm loss: 3.046524E+00 | grad norm: 0.458 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.815 | TFLOPs: 30.31 | +7: iteration 16390/ 21553 | consumed samples: 4195840 | consumed tokens: 8593080320 | elapsed time per iteration (s): 0.30 | learning rate: 4.478E-05 | global batch size: 256 | lm loss: 3.050196E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.326 | TFLOPs: 30.29 | +7: iteration 16400/ 21553 | consumed samples: 4198400 | consumed tokens: 8598323200 | elapsed time per iteration (s): 0.30 | learning rate: 4.468E-05 | global batch size: 256 | lm loss: 3.050882E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.895 | TFLOPs: 30.31 | +7: iteration 16410/ 21553 | consumed samples: 4200960 | consumed tokens: 8603566080 | elapsed time per iteration (s): 0.30 | learning rate: 4.459E-05 | global batch size: 256 | lm loss: 3.044919E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.968 | TFLOPs: 30.14 | +7: iteration 16420/ 21553 | consumed samples: 4203520 | consumed tokens: 8608808960 | elapsed time per iteration (s): 0.30 | learning rate: 4.450E-05 | global batch size: 256 | lm loss: 3.059353E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.018 | TFLOPs: 29.93 | +7: iteration 16430/ 21553 | consumed samples: 4206080 | consumed tokens: 8614051840 | elapsed time per iteration (s): 0.30 | learning rate: 4.441E-05 | global batch size: 256 | lm loss: 3.047205E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.011 | TFLOPs: 30.21 | +7: iteration 16440/ 21553 | consumed samples: 4208640 | consumed tokens: 8619294720 | elapsed time per iteration (s): 0.30 | learning rate: 4.432E-05 | global batch size: 256 | lm loss: 3.053039E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.289 | TFLOPs: 30.33 | +7: iteration 16450/ 21553 | consumed samples: 4211200 | consumed tokens: 8624537600 | elapsed time per iteration (s): 0.30 | learning rate: 4.423E-05 | global batch size: 256 | lm loss: 3.052656E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.361 | TFLOPs: 30.33 | +7: iteration 16460/ 21553 | consumed samples: 4213760 | consumed tokens: 8629780480 | elapsed time per iteration (s): 0.30 | learning rate: 4.414E-05 | global batch size: 256 | lm loss: 3.047952E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.408 | TFLOPs: 30.33 | +7: iteration 16470/ 21553 | consumed samples: 4216320 | consumed tokens: 8635023360 | elapsed time per iteration (s): 0.30 | learning rate: 4.405E-05 | global batch size: 256 | lm loss: 3.040421E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.115 | TFLOPs: 30.29 | +7: iteration 16480/ 21553 | consumed samples: 4218880 | consumed tokens: 8640266240 | elapsed time per iteration (s): 0.30 | learning rate: 4.396E-05 | global batch size: 256 | lm loss: 3.039542E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.930 | TFLOPs: 30.31 | +7: iteration 16490/ 21553 | consumed samples: 4221440 | consumed tokens: 8645509120 | elapsed time per iteration (s): 0.30 | learning rate: 4.387E-05 | global batch size: 256 | lm loss: 3.050986E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.388 | TFLOPs: 30.05 | +7: iteration 16500/ 21553 | consumed samples: 4224000 | consumed tokens: 8650752000 | elapsed time per iteration (s): 0.30 | learning rate: 4.378E-05 | global batch size: 256 | lm loss: 3.060339E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.932 | TFLOPs: 30.03 | +7: iteration 16510/ 21553 | consumed samples: 4226560 | consumed tokens: 8655994880 | elapsed time per iteration (s): 0.30 | learning rate: 4.369E-05 | global batch size: 256 | lm loss: 3.041810E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.397 | TFLOPs: 30.30 | +7: iteration 16520/ 21553 | consumed samples: 4229120 | consumed tokens: 8661237760 | elapsed time per iteration (s): 0.30 | learning rate: 4.360E-05 | global batch size: 256 | lm loss: 3.051009E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.140 | TFLOPs: 30.01 | +7: iteration 16530/ 21553 | consumed samples: 4231680 | consumed tokens: 8666480640 | elapsed time per iteration (s): 0.30 | learning rate: 4.351E-05 | global batch size: 256 | lm loss: 3.050359E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.264 | TFLOPs: 30.33 | +7: iteration 16540/ 21553 | consumed samples: 4234240 | consumed tokens: 8671723520 | elapsed time per iteration (s): 0.30 | learning rate: 4.342E-05 | global batch size: 256 | lm loss: 3.051466E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.344 | TFLOPs: 30.33 | +7: iteration 16550/ 21553 | consumed samples: 4236800 | consumed tokens: 8676966400 | elapsed time per iteration (s): 0.30 | learning rate: 4.333E-05 | global batch size: 256 | lm loss: 3.050291E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.447 | TFLOPs: 30.33 | +7: iteration 16560/ 21553 | consumed samples: 4239360 | consumed tokens: 8682209280 | elapsed time per iteration (s): 0.30 | learning rate: 4.324E-05 | global batch size: 256 | lm loss: 3.054716E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.752 | TFLOPs: 30.31 | +7: iteration 16570/ 21553 | consumed samples: 4241920 | consumed tokens: 8687452160 | elapsed time per iteration (s): 0.30 | learning rate: 4.315E-05 | global batch size: 256 | lm loss: 3.054036E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.166 | TFLOPs: 30.29 | +7: iteration 16580/ 21553 | consumed samples: 4244480 | consumed tokens: 8692695040 | elapsed time per iteration (s): 0.30 | learning rate: 4.307E-05 | global batch size: 256 | lm loss: 3.038944E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.957 | TFLOPs: 30.31 | +7: iteration 16590/ 21553 | consumed samples: 4247040 | consumed tokens: 8697937920 | elapsed time per iteration (s): 0.30 | learning rate: 4.298E-05 | global batch size: 256 | lm loss: 3.060782E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.882 | TFLOPs: 29.93 | +7: iteration 16600/ 21553 | consumed samples: 4249600 | consumed tokens: 8703180800 | elapsed time per iteration (s): 0.30 | learning rate: 4.289E-05 | global batch size: 256 | lm loss: 3.058711E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.896 | TFLOPs: 30.31 | +7: iteration 16610/ 21553 | consumed samples: 4252160 | consumed tokens: 8708423680 | elapsed time per iteration (s): 0.30 | learning rate: 4.280E-05 | global batch size: 256 | lm loss: 3.049573E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.771 | TFLOPs: 30.31 | +7: iteration 16620/ 21553 | consumed samples: 4254720 | consumed tokens: 8713666560 | elapsed time per iteration (s): 0.30 | learning rate: 4.271E-05 | global batch size: 256 | lm loss: 3.055663E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.591 | TFLOPs: 30.30 | +7: iteration 16630/ 21553 | consumed samples: 4257280 | consumed tokens: 8718909440 | elapsed time per iteration (s): 0.30 | learning rate: 4.263E-05 | global batch size: 256 | lm loss: 3.051058E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.544 | TFLOPs: 30.30 | +7: iteration 16640/ 21553 | consumed samples: 4259840 | consumed tokens: 8724152320 | elapsed time per iteration (s): 0.30 | learning rate: 4.254E-05 | global batch size: 256 | lm loss: 3.046347E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.564 | TFLOPs: 29.92 | +7: iteration 16650/ 21553 | consumed samples: 4262400 | consumed tokens: 8729395200 | elapsed time per iteration (s): 0.30 | learning rate: 4.245E-05 | global batch size: 256 | lm loss: 3.036446E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.962 | TFLOPs: 30.31 | +7: iteration 16660/ 21553 | consumed samples: 4264960 | consumed tokens: 8734638080 | elapsed time per iteration (s): 0.30 | learning rate: 4.236E-05 | global batch size: 256 | lm loss: 3.058146E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.790 | TFLOPs: 30.31 | +7: iteration 16670/ 21553 | consumed samples: 4267520 | consumed tokens: 8739880960 | elapsed time per iteration (s): 0.30 | learning rate: 4.227E-05 | global batch size: 256 | lm loss: 3.047544E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.857 | TFLOPs: 30.31 | +7: iteration 16680/ 21553 | consumed samples: 4270080 | consumed tokens: 8745123840 | elapsed time per iteration (s): 0.30 | learning rate: 4.219E-05 | global batch size: 256 | lm loss: 3.052526E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.625 | TFLOPs: 30.30 | +7: iteration 16690/ 21553 | consumed samples: 4272640 | consumed tokens: 8750366720 | elapsed time per iteration (s): 0.30 | learning rate: 4.210E-05 | global batch size: 256 | lm loss: 3.031953E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.670 | TFLOPs: 30.30 | +7: iteration 16700/ 21553 | consumed samples: 4275200 | consumed tokens: 8755609600 | elapsed time per iteration (s): 0.30 | learning rate: 4.201E-05 | global batch size: 256 | lm loss: 3.050744E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.144 | TFLOPs: 30.32 | +7: iteration 16710/ 21553 | consumed samples: 4277760 | consumed tokens: 8760852480 | elapsed time per iteration (s): 0.30 | learning rate: 4.193E-05 | global batch size: 256 | lm loss: 3.048672E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.282 | TFLOPs: 30.36 | +7: iteration 16720/ 21553 | consumed samples: 4280320 | consumed tokens: 8766095360 | elapsed time per iteration (s): 0.30 | learning rate: 4.184E-05 | global batch size: 256 | lm loss: 3.049073E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.240 | TFLOPs: 29.94 | +7: iteration 16730/ 21553 | consumed samples: 4282880 | consumed tokens: 8771338240 | elapsed time per iteration (s): 0.30 | learning rate: 4.175E-05 | global batch size: 256 | lm loss: 3.041560E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.958 | TFLOPs: 29.89 | +7: iteration 16740/ 21553 | consumed samples: 4285440 | consumed tokens: 8776581120 | elapsed time per iteration (s): 0.30 | learning rate: 4.167E-05 | global batch size: 256 | lm loss: 3.044128E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.953 | TFLOPs: 30.31 | +7: iteration 16750/ 21553 | consumed samples: 4288000 | consumed tokens: 8781824000 | elapsed time per iteration (s): 0.30 | learning rate: 4.158E-05 | global batch size: 256 | lm loss: 3.045182E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.150 | TFLOPs: 29.87 | +7: iteration 16760/ 21553 | consumed samples: 4290560 | consumed tokens: 8787066880 | elapsed time per iteration (s): 0.30 | learning rate: 4.150E-05 | global batch size: 256 | lm loss: 3.056324E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 853.357 | TFLOPs: 29.87 | +7: iteration 16770/ 21553 | consumed samples: 4293120 | consumed tokens: 8792309760 | elapsed time per iteration (s): 0.30 | learning rate: 4.141E-05 | global batch size: 256 | lm loss: 3.041434E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.331 | TFLOPs: 30.29 | +7: iteration 16780/ 21553 | consumed samples: 4295680 | consumed tokens: 8797552640 | elapsed time per iteration (s): 0.30 | learning rate: 4.132E-05 | global batch size: 256 | lm loss: 3.051227E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.716 | TFLOPs: 30.34 | +7: iteration 16790/ 21553 | consumed samples: 4298240 | consumed tokens: 8802795520 | elapsed time per iteration (s): 0.30 | learning rate: 4.124E-05 | global batch size: 256 | lm loss: 3.040777E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.399 | TFLOPs: 30.33 | +7: iteration 16800/ 21553 | consumed samples: 4300800 | consumed tokens: 8808038400 | elapsed time per iteration (s): 0.30 | learning rate: 4.115E-05 | global batch size: 256 | lm loss: 3.057640E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.052 | TFLOPs: 30.32 | +7: iteration 16810/ 21553 | consumed samples: 4303360 | consumed tokens: 8813281280 | elapsed time per iteration (s): 0.30 | learning rate: 4.107E-05 | global batch size: 256 | lm loss: 3.059677E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.088 | TFLOPs: 30.32 | +7: iteration 16820/ 21553 | consumed samples: 4305920 | consumed tokens: 8818524160 | elapsed time per iteration (s): 0.30 | learning rate: 4.098E-05 | global batch size: 256 | lm loss: 3.053879E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.594 | TFLOPs: 30.30 | +7: iteration 16830/ 21553 | consumed samples: 4308480 | consumed tokens: 8823767040 | elapsed time per iteration (s): 0.30 | learning rate: 4.090E-05 | global batch size: 256 | lm loss: 3.045222E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.033 | TFLOPs: 30.32 | +7: iteration 16840/ 21553 | consumed samples: 4311040 | consumed tokens: 8829009920 | elapsed time per iteration (s): 0.30 | learning rate: 4.081E-05 | global batch size: 256 | lm loss: 3.036823E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.898 | TFLOPs: 30.31 | +7: iteration 16850/ 21553 | consumed samples: 4313600 | consumed tokens: 8834252800 | elapsed time per iteration (s): 0.30 | learning rate: 4.073E-05 | global batch size: 256 | lm loss: 3.036201E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.342 | TFLOPs: 30.33 | +7: iteration 16860/ 21553 | consumed samples: 4316160 | consumed tokens: 8839495680 | elapsed time per iteration (s): 0.30 | learning rate: 4.064E-05 | global batch size: 256 | lm loss: 3.045547E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.865 | TFLOPs: 30.31 | +7: iteration 16870/ 21553 | consumed samples: 4318720 | consumed tokens: 8844738560 | elapsed time per iteration (s): 0.30 | learning rate: 4.056E-05 | global batch size: 256 | lm loss: 3.051494E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.614 | TFLOPs: 30.34 | +7: iteration 16880/ 21553 | consumed samples: 4321280 | consumed tokens: 8849981440 | elapsed time per iteration (s): 0.30 | learning rate: 4.047E-05 | global batch size: 256 | lm loss: 3.049772E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.137 | TFLOPs: 30.32 | +7: iteration 16890/ 21553 | consumed samples: 4323840 | consumed tokens: 8855224320 | elapsed time per iteration (s): 0.30 | learning rate: 4.039E-05 | global batch size: 256 | lm loss: 3.041571E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.494 | TFLOPs: 30.33 | +7: iteration 16900/ 21553 | consumed samples: 4326400 | consumed tokens: 8860467200 | elapsed time per iteration (s): 0.30 | learning rate: 4.031E-05 | global batch size: 256 | lm loss: 3.050747E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.168 | TFLOPs: 30.32 | +7: iteration 16910/ 21553 | consumed samples: 4328960 | consumed tokens: 8865710080 | elapsed time per iteration (s): 0.30 | learning rate: 4.022E-05 | global batch size: 256 | lm loss: 3.033725E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.386 | TFLOPs: 30.33 | +7: iteration 16920/ 21553 | consumed samples: 4331520 | consumed tokens: 8870952960 | elapsed time per iteration (s): 0.30 | learning rate: 4.014E-05 | global batch size: 256 | lm loss: 3.049219E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.258 | TFLOPs: 30.33 | +7: iteration 16930/ 21553 | consumed samples: 4334080 | consumed tokens: 8876195840 | elapsed time per iteration (s): 0.30 | learning rate: 4.006E-05 | global batch size: 256 | lm loss: 3.050281E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.388 | TFLOPs: 30.33 | +7: iteration 16940/ 21553 | consumed samples: 4336640 | consumed tokens: 8881438720 | elapsed time per iteration (s): 0.30 | learning rate: 3.997E-05 | global batch size: 256 | lm loss: 3.049708E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.591 | TFLOPs: 30.34 | +7: iteration 16950/ 21553 | consumed samples: 4339200 | consumed tokens: 8886681600 | elapsed time per iteration (s): 0.30 | learning rate: 3.989E-05 | global batch size: 256 | lm loss: 3.044542E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.395 | TFLOPs: 30.33 | +7: iteration 16960/ 21553 | consumed samples: 4341760 | consumed tokens: 8891924480 | elapsed time per iteration (s): 0.30 | learning rate: 3.981E-05 | global batch size: 256 | lm loss: 3.042579E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.492 | TFLOPs: 30.33 | +7: iteration 16970/ 21553 | consumed samples: 4344320 | consumed tokens: 8897167360 | elapsed time per iteration (s): 0.30 | learning rate: 3.972E-05 | global batch size: 256 | lm loss: 3.033918E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.130 | TFLOPs: 30.32 | +7: iteration 16980/ 21553 | consumed samples: 4346880 | consumed tokens: 8902410240 | elapsed time per iteration (s): 0.30 | learning rate: 3.964E-05 | global batch size: 256 | lm loss: 3.038127E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.827 | TFLOPs: 30.35 | +7: iteration 16990/ 21553 | consumed samples: 4349440 | consumed tokens: 8907653120 | elapsed time per iteration (s): 0.30 | learning rate: 3.956E-05 | global batch size: 256 | lm loss: 3.039519E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.174 | TFLOPs: 30.32 | +7: iteration 17000/ 21553 | consumed samples: 4352000 | consumed tokens: 8912896000 | elapsed time per iteration (s): 0.30 | learning rate: 3.948E-05 | global batch size: 256 | lm loss: 3.047639E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.937 | TFLOPs: 30.31 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 17000 | lm loss value: 3.760770E+00 | lm loss PPL: 4.298149E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 17000 to checkpoints_146m14b100m +0: [2023-03-14 00:42:53,002] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step17000 is begin to save! +0: [2023-03-14 00:42:53,006] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:42:53,092] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:42:53,092] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:42:53,108] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:42:53,108] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:42:53,123] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:42:53,123] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:42:53,138] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:42:53,138] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:42:53,153] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:42:53,153] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:42:53,168] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:42:53,168] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:42:53,183] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:42:53,183] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:42:53,198] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:42:53,198] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:42:53,213] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:42:53,213] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:42:53,228] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:42:53,228] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:42:53,243] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:42:53,243] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:42:53,258] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:42:53,259] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:42:53,274] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:42:53,274] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:42:53,289] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:42:53,289] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:42:53,304] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:42:53,304] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:42:53,319] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:42:53,319] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:42:53,320] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:42:53,321] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step17000/mp_rank_00_model_states.pt +0: [2023-03-14 00:42:53,321] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:42:53,323] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:42:53,341] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:42:53,375] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,374] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,375] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,375] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,375] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,376] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,377] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,374] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,374] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,376] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,376] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,376] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,376] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,377] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,377] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,377] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,381] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,381] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,381] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,382] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,382] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,382] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,383] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,383] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,384] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:42:53,384] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,384] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,384] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +7: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,384] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,384] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +1: [2023-03-14 00:42:53,386] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:42:53,386] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:42:53,386] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,387] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,387] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,387] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,388] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,388] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,388] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,388] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,388] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,388] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,389] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,389] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:42:53,389] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,389] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:42:53,389] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +5: [2023-03-14 00:42:53,389] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,379] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,379] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,380] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,383] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,380] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +3: [2023-03-14 00:42:53,383] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,382] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,382] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +2: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:42:53,383] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:42:53,383] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,390] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:42:53,390] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,390] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,378] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,378] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,385] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,385] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,394] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:42:53,394] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,394] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:42:53,394] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +6: [2023-03-14 00:42:53,394] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,408] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,408] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,408] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: [2023-03-14 00:42:53,409] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:42:53,409] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +4: [2023-03-14 00:42:53,413] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:42:53,413] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step17000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:42:53,413] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step17000 is ready now! +0: successfully saved checkpoint at iteration 17000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 416.86 +7: iteration 17010/ 21553 | consumed samples: 4354560 | consumed tokens: 8918138880 | elapsed time per iteration (s): 0.35 | learning rate: 3.939E-05 | global batch size: 256 | lm loss: 3.049222E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 739.853 | TFLOPs: 25.90 | +7: iteration 17020/ 21553 | consumed samples: 4357120 | consumed tokens: 8923381760 | elapsed time per iteration (s): 0.30 | learning rate: 3.931E-05 | global batch size: 256 | lm loss: 3.043089E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.729 | TFLOPs: 30.34 | +7: iteration 17030/ 21553 | consumed samples: 4359680 | consumed tokens: 8928624640 | elapsed time per iteration (s): 0.30 | learning rate: 3.923E-05 | global batch size: 256 | lm loss: 3.047991E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.638 | TFLOPs: 30.34 | +7: iteration 17040/ 21553 | consumed samples: 4362240 | consumed tokens: 8933867520 | elapsed time per iteration (s): 0.30 | learning rate: 3.915E-05 | global batch size: 256 | lm loss: 3.043483E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.979 | TFLOPs: 30.32 | +7: iteration 17050/ 21553 | consumed samples: 4364800 | consumed tokens: 8939110400 | elapsed time per iteration (s): 0.30 | learning rate: 3.907E-05 | global batch size: 256 | lm loss: 3.043439E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.953 | TFLOPs: 30.31 | +7: iteration 17060/ 21553 | consumed samples: 4367360 | consumed tokens: 8944353280 | elapsed time per iteration (s): 0.30 | learning rate: 3.898E-05 | global batch size: 256 | lm loss: 3.046071E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.946 | TFLOPs: 30.31 | +7: iteration 17070/ 21553 | consumed samples: 4369920 | consumed tokens: 8949596160 | elapsed time per iteration (s): 0.30 | learning rate: 3.890E-05 | global batch size: 256 | lm loss: 3.031161E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.845 | TFLOPs: 30.31 | +7: iteration 17080/ 21553 | consumed samples: 4372480 | consumed tokens: 8954839040 | elapsed time per iteration (s): 0.30 | learning rate: 3.882E-05 | global batch size: 256 | lm loss: 3.049877E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.567 | TFLOPs: 30.30 | +7: iteration 17090/ 21553 | consumed samples: 4375040 | consumed tokens: 8960081920 | elapsed time per iteration (s): 0.30 | learning rate: 3.874E-05 | global batch size: 256 | lm loss: 3.036011E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.857 | TFLOPs: 30.31 | +7: iteration 17100/ 21553 | consumed samples: 4377600 | consumed tokens: 8965324800 | elapsed time per iteration (s): 0.30 | learning rate: 3.866E-05 | global batch size: 256 | lm loss: 3.046661E+00 | grad norm: 0.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.552 | TFLOPs: 30.34 | +7: iteration 17110/ 21553 | consumed samples: 4380160 | consumed tokens: 8970567680 | elapsed time per iteration (s): 0.30 | learning rate: 3.858E-05 | global batch size: 256 | lm loss: 3.045312E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.515 | TFLOPs: 30.16 | +7: iteration 17120/ 21553 | consumed samples: 4382720 | consumed tokens: 8975810560 | elapsed time per iteration (s): 0.30 | learning rate: 3.850E-05 | global batch size: 256 | lm loss: 3.055225E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.039 | TFLOPs: 30.32 | +7: iteration 17130/ 21553 | consumed samples: 4385280 | consumed tokens: 8981053440 | elapsed time per iteration (s): 0.30 | learning rate: 3.842E-05 | global batch size: 256 | lm loss: 3.034564E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.250 | TFLOPs: 30.01 | +7: iteration 17140/ 21553 | consumed samples: 4387840 | consumed tokens: 8986296320 | elapsed time per iteration (s): 0.30 | learning rate: 3.834E-05 | global batch size: 256 | lm loss: 3.043987E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.110 | TFLOPs: 30.29 | +7: iteration 17150/ 21553 | consumed samples: 4390400 | consumed tokens: 8991539200 | elapsed time per iteration (s): 0.30 | learning rate: 3.826E-05 | global batch size: 256 | lm loss: 3.035878E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.217 | TFLOPs: 30.29 | +7: iteration 17160/ 21553 | consumed samples: 4392960 | consumed tokens: 8996782080 | elapsed time per iteration (s): 0.30 | learning rate: 3.818E-05 | global batch size: 256 | lm loss: 3.056007E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.362 | TFLOPs: 30.29 | +7: iteration 17170/ 21553 | consumed samples: 4395520 | consumed tokens: 9002024960 | elapsed time per iteration (s): 0.30 | learning rate: 3.810E-05 | global batch size: 256 | lm loss: 3.028828E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.380 | TFLOPs: 30.33 | +7: iteration 17180/ 21553 | consumed samples: 4398080 | consumed tokens: 9007267840 | elapsed time per iteration (s): 0.30 | learning rate: 3.802E-05 | global batch size: 256 | lm loss: 3.034388E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.512 | TFLOPs: 30.33 | +7: iteration 17190/ 21553 | consumed samples: 4400640 | consumed tokens: 9012510720 | elapsed time per iteration (s): 0.30 | learning rate: 3.794E-05 | global batch size: 256 | lm loss: 3.036333E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.303 | TFLOPs: 30.33 | +7: iteration 17200/ 21553 | consumed samples: 4403200 | consumed tokens: 9017753600 | elapsed time per iteration (s): 0.30 | learning rate: 3.786E-05 | global batch size: 256 | lm loss: 3.051417E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.707 | TFLOPs: 30.34 | +7: iteration 17210/ 21553 | consumed samples: 4405760 | consumed tokens: 9022996480 | elapsed time per iteration (s): 0.30 | learning rate: 3.778E-05 | global batch size: 256 | lm loss: 3.042751E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.807 | TFLOPs: 30.34 | +7: iteration 17220/ 21553 | consumed samples: 4408320 | consumed tokens: 9028239360 | elapsed time per iteration (s): 0.30 | learning rate: 3.770E-05 | global batch size: 256 | lm loss: 3.044316E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.260 | TFLOPs: 30.33 | +7: iteration 17230/ 21553 | consumed samples: 4410880 | consumed tokens: 9033482240 | elapsed time per iteration (s): 0.30 | learning rate: 3.762E-05 | global batch size: 256 | lm loss: 3.047068E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.461 | TFLOPs: 30.33 | +7: iteration 17240/ 21553 | consumed samples: 4413440 | consumed tokens: 9038725120 | elapsed time per iteration (s): 0.30 | learning rate: 3.754E-05 | global batch size: 256 | lm loss: 3.040787E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.325 | TFLOPs: 30.33 | +7: iteration 17250/ 21553 | consumed samples: 4416000 | consumed tokens: 9043968000 | elapsed time per iteration (s): 0.30 | learning rate: 3.747E-05 | global batch size: 256 | lm loss: 3.044965E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.272 | TFLOPs: 30.33 | +7: iteration 17260/ 21553 | consumed samples: 4418560 | consumed tokens: 9049210880 | elapsed time per iteration (s): 0.30 | learning rate: 3.739E-05 | global batch size: 256 | lm loss: 3.050727E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.941 | TFLOPs: 30.35 | +7: iteration 17270/ 21553 | consumed samples: 4421120 | consumed tokens: 9054453760 | elapsed time per iteration (s): 0.30 | learning rate: 3.731E-05 | global batch size: 256 | lm loss: 3.038541E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.239 | TFLOPs: 30.32 | +7: iteration 17280/ 21553 | consumed samples: 4423680 | consumed tokens: 9059696640 | elapsed time per iteration (s): 0.30 | learning rate: 3.723E-05 | global batch size: 256 | lm loss: 3.048045E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.810 | TFLOPs: 30.31 | +7: iteration 17290/ 21553 | consumed samples: 4426240 | consumed tokens: 9064939520 | elapsed time per iteration (s): 0.30 | learning rate: 3.715E-05 | global batch size: 256 | lm loss: 3.037336E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.458 | TFLOPs: 30.30 | +7: iteration 17300/ 21553 | consumed samples: 4428800 | consumed tokens: 9070182400 | elapsed time per iteration (s): 0.30 | learning rate: 3.708E-05 | global batch size: 256 | lm loss: 3.043217E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.554 | TFLOPs: 29.99 | +7: iteration 17310/ 21553 | consumed samples: 4431360 | consumed tokens: 9075425280 | elapsed time per iteration (s): 0.30 | learning rate: 3.700E-05 | global batch size: 256 | lm loss: 3.042463E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 857.330 | TFLOPs: 30.01 | +7: iteration 17320/ 21553 | consumed samples: 4433920 | consumed tokens: 9080668160 | elapsed time per iteration (s): 0.30 | learning rate: 3.692E-05 | global batch size: 256 | lm loss: 3.040205E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.353 | TFLOPs: 30.33 | +7: iteration 17330/ 21553 | consumed samples: 4436480 | consumed tokens: 9085911040 | elapsed time per iteration (s): 0.30 | learning rate: 3.684E-05 | global batch size: 256 | lm loss: 3.044441E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.657 | TFLOPs: 30.34 | +7: iteration 17340/ 21553 | consumed samples: 4439040 | consumed tokens: 9091153920 | elapsed time per iteration (s): 0.30 | learning rate: 3.677E-05 | global batch size: 256 | lm loss: 3.032572E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.365 | TFLOPs: 30.33 | +7: iteration 17350/ 21553 | consumed samples: 4441600 | consumed tokens: 9096396800 | elapsed time per iteration (s): 0.30 | learning rate: 3.669E-05 | global batch size: 256 | lm loss: 3.030160E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.741 | TFLOPs: 30.34 | +7: iteration 17360/ 21553 | consumed samples: 4444160 | consumed tokens: 9101639680 | elapsed time per iteration (s): 0.30 | learning rate: 3.661E-05 | global batch size: 256 | lm loss: 3.030872E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.651 | TFLOPs: 30.34 | +7: iteration 17370/ 21553 | consumed samples: 4446720 | consumed tokens: 9106882560 | elapsed time per iteration (s): 0.30 | learning rate: 3.654E-05 | global batch size: 256 | lm loss: 3.033254E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.688 | TFLOPs: 30.34 | +7: iteration 17380/ 21553 | consumed samples: 4449280 | consumed tokens: 9112125440 | elapsed time per iteration (s): 0.30 | learning rate: 3.646E-05 | global batch size: 256 | lm loss: 3.028268E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.706 | TFLOPs: 30.34 | +7: iteration 17390/ 21553 | consumed samples: 4451840 | consumed tokens: 9117368320 | elapsed time per iteration (s): 0.30 | learning rate: 3.638E-05 | global batch size: 256 | lm loss: 3.042235E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.741 | TFLOPs: 30.34 | +7: iteration 17400/ 21553 | consumed samples: 4454400 | consumed tokens: 9122611200 | elapsed time per iteration (s): 0.30 | learning rate: 3.631E-05 | global batch size: 256 | lm loss: 3.052576E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.705 | TFLOPs: 30.34 | +7: iteration 17410/ 21553 | consumed samples: 4456960 | consumed tokens: 9127854080 | elapsed time per iteration (s): 0.30 | learning rate: 3.623E-05 | global batch size: 256 | lm loss: 3.041695E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.786 | TFLOPs: 30.34 | +7: iteration 17420/ 21553 | consumed samples: 4459520 | consumed tokens: 9133096960 | elapsed time per iteration (s): 0.30 | learning rate: 3.616E-05 | global batch size: 256 | lm loss: 3.041335E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.287 | TFLOPs: 30.33 | +7: iteration 17430/ 21553 | consumed samples: 4462080 | consumed tokens: 9138339840 | elapsed time per iteration (s): 0.30 | learning rate: 3.608E-05 | global batch size: 256 | lm loss: 3.055643E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.365 | TFLOPs: 30.33 | +7: iteration 17440/ 21553 | consumed samples: 4464640 | consumed tokens: 9143582720 | elapsed time per iteration (s): 0.30 | learning rate: 3.600E-05 | global batch size: 256 | lm loss: 3.025899E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.320 | TFLOPs: 30.33 | +7: iteration 17450/ 21553 | consumed samples: 4467200 | consumed tokens: 9148825600 | elapsed time per iteration (s): 0.30 | learning rate: 3.593E-05 | global batch size: 256 | lm loss: 3.046959E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.100 | TFLOPs: 30.32 | +7: iteration 17460/ 21553 | consumed samples: 4469760 | consumed tokens: 9154068480 | elapsed time per iteration (s): 0.30 | learning rate: 3.585E-05 | global batch size: 256 | lm loss: 3.048563E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.876 | TFLOPs: 30.31 | +7: iteration 17470/ 21553 | consumed samples: 4472320 | consumed tokens: 9159311360 | elapsed time per iteration (s): 0.30 | learning rate: 3.578E-05 | global batch size: 256 | lm loss: 3.025268E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.766 | TFLOPs: 30.34 | +7: iteration 17480/ 21553 | consumed samples: 4474880 | consumed tokens: 9164554240 | elapsed time per iteration (s): 0.30 | learning rate: 3.570E-05 | global batch size: 256 | lm loss: 3.030670E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.935 | TFLOPs: 30.31 | +7: iteration 17490/ 21553 | consumed samples: 4477440 | consumed tokens: 9169797120 | elapsed time per iteration (s): 0.30 | learning rate: 3.563E-05 | global batch size: 256 | lm loss: 3.036707E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.191 | TFLOPs: 30.29 | +7: iteration 17500/ 21553 | consumed samples: 4480000 | consumed tokens: 9175040000 | elapsed time per iteration (s): 0.30 | learning rate: 3.555E-05 | global batch size: 256 | lm loss: 3.056638E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.674 | TFLOPs: 30.30 | +7: iteration 17510/ 21553 | consumed samples: 4482560 | consumed tokens: 9180282880 | elapsed time per iteration (s): 0.30 | learning rate: 3.548E-05 | global batch size: 256 | lm loss: 3.051899E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.216 | TFLOPs: 30.32 | +7: iteration 17520/ 21553 | consumed samples: 4485120 | consumed tokens: 9185525760 | elapsed time per iteration (s): 0.30 | learning rate: 3.541E-05 | global batch size: 256 | lm loss: 3.048242E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.610 | TFLOPs: 30.30 | +7: iteration 17530/ 21553 | consumed samples: 4487680 | consumed tokens: 9190768640 | elapsed time per iteration (s): 0.30 | learning rate: 3.533E-05 | global batch size: 256 | lm loss: 3.027240E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.573 | TFLOPs: 30.34 | +7: iteration 17540/ 21553 | consumed samples: 4490240 | consumed tokens: 9196011520 | elapsed time per iteration (s): 0.30 | learning rate: 3.526E-05 | global batch size: 256 | lm loss: 3.033437E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.703 | TFLOPs: 30.34 | +7: iteration 17550/ 21553 | consumed samples: 4492800 | consumed tokens: 9201254400 | elapsed time per iteration (s): 0.30 | learning rate: 3.518E-05 | global batch size: 256 | lm loss: 3.047593E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.467 | TFLOPs: 30.33 | +7: iteration 17560/ 21553 | consumed samples: 4495360 | consumed tokens: 9206497280 | elapsed time per iteration (s): 0.30 | learning rate: 3.511E-05 | global batch size: 256 | lm loss: 3.045166E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.930 | TFLOPs: 30.35 | +7: iteration 17570/ 21553 | consumed samples: 4497920 | consumed tokens: 9211740160 | elapsed time per iteration (s): 0.30 | learning rate: 3.504E-05 | global batch size: 256 | lm loss: 3.043521E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.618 | TFLOPs: 30.34 | +7: iteration 17580/ 21553 | consumed samples: 4500480 | consumed tokens: 9216983040 | elapsed time per iteration (s): 0.30 | learning rate: 3.496E-05 | global batch size: 256 | lm loss: 3.043797E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.891 | TFLOPs: 30.28 | +7: iteration 17590/ 21553 | consumed samples: 4503040 | consumed tokens: 9222225920 | elapsed time per iteration (s): 0.30 | learning rate: 3.489E-05 | global batch size: 256 | lm loss: 3.031115E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.791 | TFLOPs: 30.31 | +7: iteration 17600/ 21553 | consumed samples: 4505600 | consumed tokens: 9227468800 | elapsed time per iteration (s): 0.30 | learning rate: 3.482E-05 | global batch size: 256 | lm loss: 3.046008E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.141 | TFLOPs: 30.08 | +7: iteration 17610/ 21553 | consumed samples: 4508160 | consumed tokens: 9232711680 | elapsed time per iteration (s): 0.30 | learning rate: 3.475E-05 | global batch size: 256 | lm loss: 3.048964E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.822 | TFLOPs: 30.31 | +7: iteration 17620/ 21553 | consumed samples: 4510720 | consumed tokens: 9237954560 | elapsed time per iteration (s): 0.30 | learning rate: 3.467E-05 | global batch size: 256 | lm loss: 3.046865E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.232 | TFLOPs: 30.32 | +7: iteration 17630/ 21553 | consumed samples: 4513280 | consumed tokens: 9243197440 | elapsed time per iteration (s): 0.30 | learning rate: 3.460E-05 | global batch size: 256 | lm loss: 3.030204E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.292 | TFLOPs: 30.33 | +7: iteration 17640/ 21553 | consumed samples: 4515840 | consumed tokens: 9248440320 | elapsed time per iteration (s): 0.30 | learning rate: 3.453E-05 | global batch size: 256 | lm loss: 3.028922E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.937 | TFLOPs: 30.31 | +7: iteration 17650/ 21553 | consumed samples: 4518400 | consumed tokens: 9253683200 | elapsed time per iteration (s): 0.30 | learning rate: 3.446E-05 | global batch size: 256 | lm loss: 3.045475E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.031 | TFLOPs: 30.28 | +7: iteration 17660/ 21553 | consumed samples: 4520960 | consumed tokens: 9258926080 | elapsed time per iteration (s): 0.30 | learning rate: 3.438E-05 | global batch size: 256 | lm loss: 3.044540E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.021 | TFLOPs: 30.28 | +7: iteration 17670/ 21553 | consumed samples: 4523520 | consumed tokens: 9264168960 | elapsed time per iteration (s): 0.30 | learning rate: 3.431E-05 | global batch size: 256 | lm loss: 3.036472E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.897 | TFLOPs: 30.31 | +7: iteration 17680/ 21553 | consumed samples: 4526080 | consumed tokens: 9269411840 | elapsed time per iteration (s): 0.30 | learning rate: 3.424E-05 | global batch size: 256 | lm loss: 3.032155E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.287 | TFLOPs: 30.29 | +7: iteration 17690/ 21553 | consumed samples: 4528640 | consumed tokens: 9274654720 | elapsed time per iteration (s): 0.30 | learning rate: 3.417E-05 | global batch size: 256 | lm loss: 3.027225E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.820 | TFLOPs: 30.31 | +7: iteration 17700/ 21553 | consumed samples: 4531200 | consumed tokens: 9279897600 | elapsed time per iteration (s): 0.30 | learning rate: 3.410E-05 | global batch size: 256 | lm loss: 3.049012E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.746 | TFLOPs: 30.27 | +7: iteration 17710/ 21553 | consumed samples: 4533760 | consumed tokens: 9285140480 | elapsed time per iteration (s): 0.30 | learning rate: 3.403E-05 | global batch size: 256 | lm loss: 3.034991E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.905 | TFLOPs: 30.31 | +7: iteration 17720/ 21553 | consumed samples: 4536320 | consumed tokens: 9290383360 | elapsed time per iteration (s): 0.30 | learning rate: 3.396E-05 | global batch size: 256 | lm loss: 3.038579E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.638 | TFLOPs: 30.30 | +7: iteration 17730/ 21553 | consumed samples: 4538880 | consumed tokens: 9295626240 | elapsed time per iteration (s): 0.30 | learning rate: 3.388E-05 | global batch size: 256 | lm loss: 3.038490E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.827 | TFLOPs: 30.31 | +7: iteration 17740/ 21553 | consumed samples: 4541440 | consumed tokens: 9300869120 | elapsed time per iteration (s): 0.30 | learning rate: 3.381E-05 | global batch size: 256 | lm loss: 3.019903E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.941 | TFLOPs: 30.31 | +7: iteration 17750/ 21553 | consumed samples: 4544000 | consumed tokens: 9306112000 | elapsed time per iteration (s): 0.30 | learning rate: 3.374E-05 | global batch size: 256 | lm loss: 3.038936E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.588 | TFLOPs: 30.34 | +7: iteration 17760/ 21553 | consumed samples: 4546560 | consumed tokens: 9311354880 | elapsed time per iteration (s): 0.30 | learning rate: 3.367E-05 | global batch size: 256 | lm loss: 3.044214E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.769 | TFLOPs: 30.34 | +7: iteration 17770/ 21553 | consumed samples: 4549120 | consumed tokens: 9316597760 | elapsed time per iteration (s): 0.30 | learning rate: 3.360E-05 | global batch size: 256 | lm loss: 3.033753E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.708 | TFLOPs: 30.34 | +7: iteration 17780/ 21553 | consumed samples: 4551680 | consumed tokens: 9321840640 | elapsed time per iteration (s): 0.30 | learning rate: 3.353E-05 | global batch size: 256 | lm loss: 3.018426E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 848.892 | TFLOPs: 29.72 | +7: iteration 17790/ 21553 | consumed samples: 4554240 | consumed tokens: 9327083520 | elapsed time per iteration (s): 0.30 | learning rate: 3.346E-05 | global batch size: 256 | lm loss: 3.040155E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.568 | TFLOPs: 29.95 | +7: iteration 17800/ 21553 | consumed samples: 4556800 | consumed tokens: 9332326400 | elapsed time per iteration (s): 0.30 | learning rate: 3.339E-05 | global batch size: 256 | lm loss: 3.037475E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.171 | TFLOPs: 30.32 | +7: iteration 17810/ 21553 | consumed samples: 4559360 | consumed tokens: 9337569280 | elapsed time per iteration (s): 0.30 | learning rate: 3.332E-05 | global batch size: 256 | lm loss: 3.028853E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.829 | TFLOPs: 30.31 | +7: iteration 17820/ 21553 | consumed samples: 4561920 | consumed tokens: 9342812160 | elapsed time per iteration (s): 0.30 | learning rate: 3.326E-05 | global batch size: 256 | lm loss: 3.030807E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.202 | TFLOPs: 30.32 | +7: iteration 17830/ 21553 | consumed samples: 4564480 | consumed tokens: 9348055040 | elapsed time per iteration (s): 0.30 | learning rate: 3.319E-05 | global batch size: 256 | lm loss: 3.039270E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.954 | TFLOPs: 30.35 | +7: iteration 17840/ 21553 | consumed samples: 4567040 | consumed tokens: 9353297920 | elapsed time per iteration (s): 0.30 | learning rate: 3.312E-05 | global batch size: 256 | lm loss: 3.036196E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.457 | TFLOPs: 30.33 | +7: iteration 17850/ 21553 | consumed samples: 4569600 | consumed tokens: 9358540800 | elapsed time per iteration (s): 0.30 | learning rate: 3.305E-05 | global batch size: 256 | lm loss: 3.032024E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.447 | TFLOPs: 30.33 | +7: iteration 17860/ 21553 | consumed samples: 4572160 | consumed tokens: 9363783680 | elapsed time per iteration (s): 0.30 | learning rate: 3.298E-05 | global batch size: 256 | lm loss: 3.044245E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.960 | TFLOPs: 30.31 | +7: iteration 17870/ 21553 | consumed samples: 4574720 | consumed tokens: 9369026560 | elapsed time per iteration (s): 0.30 | learning rate: 3.291E-05 | global batch size: 256 | lm loss: 3.034055E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.518 | TFLOPs: 30.33 | +7: iteration 17880/ 21553 | consumed samples: 4577280 | consumed tokens: 9374269440 | elapsed time per iteration (s): 0.30 | learning rate: 3.284E-05 | global batch size: 256 | lm loss: 3.030790E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.324 | TFLOPs: 30.33 | +7: iteration 17890/ 21553 | consumed samples: 4579840 | consumed tokens: 9379512320 | elapsed time per iteration (s): 0.30 | learning rate: 3.277E-05 | global batch size: 256 | lm loss: 3.030852E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.493 | TFLOPs: 30.33 | +7: iteration 17900/ 21553 | consumed samples: 4582400 | consumed tokens: 9384755200 | elapsed time per iteration (s): 0.30 | learning rate: 3.271E-05 | global batch size: 256 | lm loss: 3.031063E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.565 | TFLOPs: 30.34 | +7: iteration 17910/ 21553 | consumed samples: 4584960 | consumed tokens: 9389998080 | elapsed time per iteration (s): 0.30 | learning rate: 3.264E-05 | global batch size: 256 | lm loss: 3.034892E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.721 | TFLOPs: 30.34 | +7: iteration 17920/ 21553 | consumed samples: 4587520 | consumed tokens: 9395240960 | elapsed time per iteration (s): 0.30 | learning rate: 3.257E-05 | global batch size: 256 | lm loss: 3.041699E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.065 | TFLOPs: 30.35 | +7: iteration 17930/ 21553 | consumed samples: 4590080 | consumed tokens: 9400483840 | elapsed time per iteration (s): 0.30 | learning rate: 3.250E-05 | global batch size: 256 | lm loss: 3.037229E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.418 | TFLOPs: 30.33 | +7: iteration 17940/ 21553 | consumed samples: 4592640 | consumed tokens: 9405726720 | elapsed time per iteration (s): 0.30 | learning rate: 3.244E-05 | global batch size: 256 | lm loss: 3.037157E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.679 | TFLOPs: 30.34 | +7: iteration 17950/ 21553 | consumed samples: 4595200 | consumed tokens: 9410969600 | elapsed time per iteration (s): 0.30 | learning rate: 3.237E-05 | global batch size: 256 | lm loss: 3.026310E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.157 | TFLOPs: 30.32 | +7: iteration 17960/ 21553 | consumed samples: 4597760 | consumed tokens: 9416212480 | elapsed time per iteration (s): 0.30 | learning rate: 3.230E-05 | global batch size: 256 | lm loss: 3.041733E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.442 | TFLOPs: 30.33 | +7: iteration 17970/ 21553 | consumed samples: 4600320 | consumed tokens: 9421455360 | elapsed time per iteration (s): 0.30 | learning rate: 3.224E-05 | global batch size: 256 | lm loss: 3.044159E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.971 | TFLOPs: 30.00 | +7: iteration 17980/ 21553 | consumed samples: 4602880 | consumed tokens: 9426698240 | elapsed time per iteration (s): 0.30 | learning rate: 3.217E-05 | global batch size: 256 | lm loss: 3.029692E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.584 | TFLOPs: 30.34 | +7: iteration 17990/ 21553 | consumed samples: 4605440 | consumed tokens: 9431941120 | elapsed time per iteration (s): 0.30 | learning rate: 3.210E-05 | global batch size: 256 | lm loss: 3.028636E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.357 | TFLOPs: 30.33 | +0: [2023-03-14 00:47:49,213] [INFO] [logging.py:68:log_dist] [Rank 0] step=18000, skipped=0, lr=[3.2036439682204886e-05, 3.2036439682204886e-05, 3.2036439682204886e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 18000/ 21553 | consumed samples: 4608000 | consumed tokens: 9437184000 | elapsed time per iteration (s): 0.30 | learning rate: 3.204E-05 | global batch size: 256 | lm loss: 3.046060E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.710 | TFLOPs: 30.34 | +0: steps: 18000 loss: 3.0307 iter time (s): 0.294 samples/sec: 870.560 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 18000 | lm loss value: 3.789407E+00 | lm loss PPL: 4.423016E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 18000 to checkpoints_146m14b100m +0: [2023-03-14 00:47:49,334] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step18000 is begin to save! +0: [2023-03-14 00:47:49,337] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:47:49,424] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:47:49,424] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:47:49,439] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:47:49,440] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:47:49,455] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:47:49,455] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:47:49,470] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:47:49,470] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:47:49,485] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:47:49,485] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:47:49,500] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:47:49,500] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:47:49,515] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:47:49,515] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:47:49,530] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:47:49,530] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:47:49,545] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:47:49,545] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:47:49,560] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:47:49,561] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:47:49,575] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:47:49,576] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:47:49,591] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:47:49,591] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:47:49,606] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:47:49,606] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:47:49,621] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:47:49,621] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:47:49,636] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:47:49,636] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:47:49,651] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:47:49,651] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:47:49,652] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:47:49,653] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step18000/mp_rank_00_model_states.pt +0: [2023-03-14 00:47:49,653] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:47:49,655] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:47:49,672] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:47:49,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,705] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,707] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,707] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,711] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,711] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +7: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,705] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,705] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,706] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,706] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,711] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,706] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,707] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,708] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,708] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,711] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,711] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,710] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,710] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,712] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,712] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +5: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,713] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +3: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +3: [2023-03-14 00:47:49,713] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +5: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,715] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,715] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,715] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,715] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,715] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,716] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,716] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +2: [2023-03-14 00:47:49,716] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:47:49,716] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:47:49,716] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +4: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:47:49,714] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:47:49,714] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,717] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,718] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,718] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,719] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,719] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,719] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +6: [2023-03-14 00:47:49,720] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:47:49,720] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:47:49,720] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,730] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,730] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,730] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,730] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +1: [2023-03-14 00:47:49,730] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:47:49,730] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: [2023-03-14 00:47:49,738] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step18000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:47:49,738] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step18000 is ready now! +0: successfully saved checkpoint at iteration 18000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 409.76 +7: iteration 18010/ 21553 | consumed samples: 4610560 | consumed tokens: 9442426880 | elapsed time per iteration (s): 0.35 | learning rate: 3.197E-05 | global batch size: 256 | lm loss: 3.036322E+00 | grad norm: 0.445 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 737.448 | TFLOPs: 25.82 | +7: iteration 18020/ 21553 | consumed samples: 4613120 | consumed tokens: 9447669760 | elapsed time per iteration (s): 0.30 | learning rate: 3.190E-05 | global batch size: 256 | lm loss: 3.035678E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.569 | TFLOPs: 30.34 | +7: iteration 18030/ 21553 | consumed samples: 4615680 | consumed tokens: 9452912640 | elapsed time per iteration (s): 0.30 | learning rate: 3.184E-05 | global batch size: 256 | lm loss: 3.029703E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.404 | TFLOPs: 30.33 | +7: iteration 18040/ 21553 | consumed samples: 4618240 | consumed tokens: 9458155520 | elapsed time per iteration (s): 0.30 | learning rate: 3.177E-05 | global batch size: 256 | lm loss: 3.018658E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.139 | TFLOPs: 30.32 | +7: iteration 18050/ 21553 | consumed samples: 4620800 | consumed tokens: 9463398400 | elapsed time per iteration (s): 0.30 | learning rate: 3.171E-05 | global batch size: 256 | lm loss: 3.041102E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.531 | TFLOPs: 30.33 | +7: iteration 18060/ 21553 | consumed samples: 4623360 | consumed tokens: 9468641280 | elapsed time per iteration (s): 0.30 | learning rate: 3.164E-05 | global batch size: 256 | lm loss: 3.048845E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.717 | TFLOPs: 30.34 | +7: iteration 18070/ 21553 | consumed samples: 4625920 | consumed tokens: 9473884160 | elapsed time per iteration (s): 0.30 | learning rate: 3.158E-05 | global batch size: 256 | lm loss: 3.038477E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.098 | TFLOPs: 30.35 | +7: iteration 18080/ 21553 | consumed samples: 4628480 | consumed tokens: 9479127040 | elapsed time per iteration (s): 0.30 | learning rate: 3.151E-05 | global batch size: 256 | lm loss: 3.031525E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.569 | TFLOPs: 30.34 | +7: iteration 18090/ 21553 | consumed samples: 4631040 | consumed tokens: 9484369920 | elapsed time per iteration (s): 0.30 | learning rate: 3.145E-05 | global batch size: 256 | lm loss: 3.038260E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.008 | TFLOPs: 30.35 | +7: iteration 18100/ 21553 | consumed samples: 4633600 | consumed tokens: 9489612800 | elapsed time per iteration (s): 0.30 | learning rate: 3.138E-05 | global batch size: 256 | lm loss: 3.024182E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.514 | TFLOPs: 30.33 | +7: iteration 18110/ 21553 | consumed samples: 4636160 | consumed tokens: 9494855680 | elapsed time per iteration (s): 0.30 | learning rate: 3.132E-05 | global batch size: 256 | lm loss: 3.019160E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.680 | TFLOPs: 30.34 | +7: iteration 18120/ 21553 | consumed samples: 4638720 | consumed tokens: 9500098560 | elapsed time per iteration (s): 0.30 | learning rate: 3.125E-05 | global batch size: 256 | lm loss: 3.036572E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.799 | TFLOPs: 30.34 | +7: iteration 18130/ 21553 | consumed samples: 4641280 | consumed tokens: 9505341440 | elapsed time per iteration (s): 0.30 | learning rate: 3.119E-05 | global batch size: 256 | lm loss: 3.018182E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.792 | TFLOPs: 30.34 | +7: iteration 18140/ 21553 | consumed samples: 4643840 | consumed tokens: 9510584320 | elapsed time per iteration (s): 0.30 | learning rate: 3.113E-05 | global batch size: 256 | lm loss: 3.032963E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.392 | TFLOPs: 30.33 | +7: iteration 18150/ 21553 | consumed samples: 4646400 | consumed tokens: 9515827200 | elapsed time per iteration (s): 0.30 | learning rate: 3.106E-05 | global batch size: 256 | lm loss: 3.025327E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.742 | TFLOPs: 30.34 | +7: iteration 18160/ 21553 | consumed samples: 4648960 | consumed tokens: 9521070080 | elapsed time per iteration (s): 0.30 | learning rate: 3.100E-05 | global batch size: 256 | lm loss: 3.022955E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.306 | TFLOPs: 30.33 | +7: iteration 18170/ 21553 | consumed samples: 4651520 | consumed tokens: 9526312960 | elapsed time per iteration (s): 0.30 | learning rate: 3.094E-05 | global batch size: 256 | lm loss: 3.040621E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.936 | TFLOPs: 30.35 | +7: iteration 18180/ 21553 | consumed samples: 4654080 | consumed tokens: 9531555840 | elapsed time per iteration (s): 0.30 | learning rate: 3.087E-05 | global batch size: 256 | lm loss: 3.021646E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.316 | TFLOPs: 30.36 | +7: iteration 18190/ 21553 | consumed samples: 4656640 | consumed tokens: 9536798720 | elapsed time per iteration (s): 0.30 | learning rate: 3.081E-05 | global batch size: 256 | lm loss: 3.041599E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.068 | TFLOPs: 30.35 | +7: iteration 18200/ 21553 | consumed samples: 4659200 | consumed tokens: 9542041600 | elapsed time per iteration (s): 0.30 | learning rate: 3.075E-05 | global batch size: 256 | lm loss: 3.041868E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.673 | TFLOPs: 30.34 | +7: iteration 18210/ 21553 | consumed samples: 4661760 | consumed tokens: 9547284480 | elapsed time per iteration (s): 0.30 | learning rate: 3.068E-05 | global batch size: 256 | lm loss: 3.042604E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.712 | TFLOPs: 30.27 | +7: iteration 18220/ 21553 | consumed samples: 4664320 | consumed tokens: 9552527360 | elapsed time per iteration (s): 0.30 | learning rate: 3.062E-05 | global batch size: 256 | lm loss: 3.033922E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.135 | TFLOPs: 30.32 | +7: iteration 18230/ 21553 | consumed samples: 4666880 | consumed tokens: 9557770240 | elapsed time per iteration (s): 0.30 | learning rate: 3.056E-05 | global batch size: 256 | lm loss: 3.020525E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.603 | TFLOPs: 30.30 | +7: iteration 18240/ 21553 | consumed samples: 4669440 | consumed tokens: 9563013120 | elapsed time per iteration (s): 0.30 | learning rate: 3.050E-05 | global batch size: 256 | lm loss: 3.029695E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.572 | TFLOPs: 30.30 | +7: iteration 18250/ 21553 | consumed samples: 4672000 | consumed tokens: 9568256000 | elapsed time per iteration (s): 0.30 | learning rate: 3.043E-05 | global batch size: 256 | lm loss: 3.039893E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.605 | TFLOPs: 30.30 | +7: iteration 18260/ 21553 | consumed samples: 4674560 | consumed tokens: 9573498880 | elapsed time per iteration (s): 0.30 | learning rate: 3.037E-05 | global batch size: 256 | lm loss: 3.020023E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.060 | TFLOPs: 30.32 | +7: iteration 18270/ 21553 | consumed samples: 4677120 | consumed tokens: 9578741760 | elapsed time per iteration (s): 0.30 | learning rate: 3.031E-05 | global batch size: 256 | lm loss: 3.029306E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.139 | TFLOPs: 30.32 | +7: iteration 18280/ 21553 | consumed samples: 4679680 | consumed tokens: 9583984640 | elapsed time per iteration (s): 0.30 | learning rate: 3.025E-05 | global batch size: 256 | lm loss: 3.031481E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.658 | TFLOPs: 30.30 | +7: iteration 18290/ 21553 | consumed samples: 4682240 | consumed tokens: 9589227520 | elapsed time per iteration (s): 0.30 | learning rate: 3.019E-05 | global batch size: 256 | lm loss: 3.017467E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.398 | TFLOPs: 30.30 | +7: iteration 18300/ 21553 | consumed samples: 4684800 | consumed tokens: 9594470400 | elapsed time per iteration (s): 0.30 | learning rate: 3.013E-05 | global batch size: 256 | lm loss: 3.037125E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.311 | TFLOPs: 30.29 | +7: iteration 18310/ 21553 | consumed samples: 4687360 | consumed tokens: 9599713280 | elapsed time per iteration (s): 0.30 | learning rate: 3.007E-05 | global batch size: 256 | lm loss: 3.027229E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.274 | TFLOPs: 30.29 | +7: iteration 18320/ 21553 | consumed samples: 4689920 | consumed tokens: 9604956160 | elapsed time per iteration (s): 0.30 | learning rate: 3.001E-05 | global batch size: 256 | lm loss: 3.038723E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 847.598 | TFLOPs: 29.67 | +7: iteration 18330/ 21553 | consumed samples: 4692480 | consumed tokens: 9610199040 | elapsed time per iteration (s): 0.30 | learning rate: 2.994E-05 | global batch size: 256 | lm loss: 3.025521E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.006 | TFLOPs: 30.32 | +7: iteration 18340/ 21553 | consumed samples: 4695040 | consumed tokens: 9615441920 | elapsed time per iteration (s): 0.30 | learning rate: 2.988E-05 | global batch size: 256 | lm loss: 3.023781E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.231 | TFLOPs: 30.32 | +7: iteration 18350/ 21553 | consumed samples: 4697600 | consumed tokens: 9620684800 | elapsed time per iteration (s): 0.30 | learning rate: 2.982E-05 | global batch size: 256 | lm loss: 3.037547E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.635 | TFLOPs: 30.30 | +7: iteration 18360/ 21553 | consumed samples: 4700160 | consumed tokens: 9625927680 | elapsed time per iteration (s): 0.30 | learning rate: 2.976E-05 | global batch size: 256 | lm loss: 3.029408E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.978 | TFLOPs: 30.35 | +7: iteration 18370/ 21553 | consumed samples: 4702720 | consumed tokens: 9631170560 | elapsed time per iteration (s): 0.30 | learning rate: 2.970E-05 | global batch size: 256 | lm loss: 3.040545E+00 | grad norm: 0.408 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.497 | TFLOPs: 30.37 | +7: iteration 18380/ 21553 | consumed samples: 4705280 | consumed tokens: 9636413440 | elapsed time per iteration (s): 0.30 | learning rate: 2.964E-05 | global batch size: 256 | lm loss: 3.028973E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.379 | TFLOPs: 30.36 | +7: iteration 18390/ 21553 | consumed samples: 4707840 | consumed tokens: 9641656320 | elapsed time per iteration (s): 0.30 | learning rate: 2.958E-05 | global batch size: 256 | lm loss: 3.031858E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.333 | TFLOPs: 30.36 | +7: iteration 18400/ 21553 | consumed samples: 4710400 | consumed tokens: 9646899200 | elapsed time per iteration (s): 0.29 | learning rate: 2.953E-05 | global batch size: 256 | lm loss: 3.016984E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.859 | TFLOPs: 30.38 | +7: iteration 18410/ 21553 | consumed samples: 4712960 | consumed tokens: 9652142080 | elapsed time per iteration (s): 0.30 | learning rate: 2.947E-05 | global batch size: 256 | lm loss: 3.022057E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.167 | TFLOPs: 30.36 | +7: iteration 18420/ 21553 | consumed samples: 4715520 | consumed tokens: 9657384960 | elapsed time per iteration (s): 0.30 | learning rate: 2.941E-05 | global batch size: 256 | lm loss: 3.026342E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.072 | TFLOPs: 30.35 | +7: iteration 18430/ 21553 | consumed samples: 4718080 | consumed tokens: 9662627840 | elapsed time per iteration (s): 0.30 | learning rate: 2.935E-05 | global batch size: 256 | lm loss: 3.023218E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.789 | TFLOPs: 30.34 | +7: iteration 18440/ 21553 | consumed samples: 4720640 | consumed tokens: 9667870720 | elapsed time per iteration (s): 0.30 | learning rate: 2.929E-05 | global batch size: 256 | lm loss: 3.017051E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.465 | TFLOPs: 30.37 | +7: iteration 18450/ 21553 | consumed samples: 4723200 | consumed tokens: 9673113600 | elapsed time per iteration (s): 0.30 | learning rate: 2.923E-05 | global batch size: 256 | lm loss: 3.025870E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.593 | TFLOPs: 30.37 | +7: iteration 18460/ 21553 | consumed samples: 4725760 | consumed tokens: 9678356480 | elapsed time per iteration (s): 0.30 | learning rate: 2.917E-05 | global batch size: 256 | lm loss: 3.038646E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.217 | TFLOPs: 30.36 | +7: iteration 18470/ 21553 | consumed samples: 4728320 | consumed tokens: 9683599360 | elapsed time per iteration (s): 0.30 | learning rate: 2.911E-05 | global batch size: 256 | lm loss: 3.047448E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.289 | TFLOPs: 30.36 | +7: iteration 18480/ 21553 | consumed samples: 4730880 | consumed tokens: 9688842240 | elapsed time per iteration (s): 0.30 | learning rate: 2.906E-05 | global batch size: 256 | lm loss: 3.017587E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.231 | TFLOPs: 30.36 | +7: iteration 18490/ 21553 | consumed samples: 4733440 | consumed tokens: 9694085120 | elapsed time per iteration (s): 0.30 | learning rate: 2.900E-05 | global batch size: 256 | lm loss: 3.030746E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 854.806 | TFLOPs: 29.92 | +7: iteration 18500/ 21553 | consumed samples: 4736000 | consumed tokens: 9699328000 | elapsed time per iteration (s): 0.30 | learning rate: 2.894E-05 | global batch size: 256 | lm loss: 3.020288E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.396 | TFLOPs: 30.37 | +7: iteration 18510/ 21553 | consumed samples: 4738560 | consumed tokens: 9704570880 | elapsed time per iteration (s): 0.30 | learning rate: 2.888E-05 | global batch size: 256 | lm loss: 3.028590E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.797 | TFLOPs: 30.34 | +7: iteration 18520/ 21553 | consumed samples: 4741120 | consumed tokens: 9709813760 | elapsed time per iteration (s): 0.30 | learning rate: 2.883E-05 | global batch size: 256 | lm loss: 3.024704E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.623 | TFLOPs: 30.27 | +7: iteration 18530/ 21553 | consumed samples: 4743680 | consumed tokens: 9715056640 | elapsed time per iteration (s): 0.30 | learning rate: 2.877E-05 | global batch size: 256 | lm loss: 3.027055E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.895 | TFLOPs: 30.31 | +7: iteration 18540/ 21553 | consumed samples: 4746240 | consumed tokens: 9720299520 | elapsed time per iteration (s): 0.30 | learning rate: 2.871E-05 | global batch size: 256 | lm loss: 3.012725E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.219 | TFLOPs: 30.36 | +7: iteration 18550/ 21553 | consumed samples: 4748800 | consumed tokens: 9725542400 | elapsed time per iteration (s): 0.30 | learning rate: 2.865E-05 | global batch size: 256 | lm loss: 3.021031E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.749 | TFLOPs: 30.34 | +7: iteration 18560/ 21553 | consumed samples: 4751360 | consumed tokens: 9730785280 | elapsed time per iteration (s): 0.30 | learning rate: 2.860E-05 | global batch size: 256 | lm loss: 3.025090E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.647 | TFLOPs: 30.30 | +7: iteration 18570/ 21553 | consumed samples: 4753920 | consumed tokens: 9736028160 | elapsed time per iteration (s): 0.30 | learning rate: 2.854E-05 | global batch size: 256 | lm loss: 3.010120E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.003 | TFLOPs: 30.28 | +7: iteration 18580/ 21553 | consumed samples: 4756480 | consumed tokens: 9741271040 | elapsed time per iteration (s): 0.30 | learning rate: 2.849E-05 | global batch size: 256 | lm loss: 3.018224E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 851.034 | TFLOPs: 29.79 | +7: iteration 18590/ 21553 | consumed samples: 4759040 | consumed tokens: 9746513920 | elapsed time per iteration (s): 0.30 | learning rate: 2.843E-05 | global batch size: 256 | lm loss: 3.029184E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.366 | TFLOPs: 30.36 | +7: iteration 18600/ 21553 | consumed samples: 4761600 | consumed tokens: 9751756800 | elapsed time per iteration (s): 0.30 | learning rate: 2.837E-05 | global batch size: 256 | lm loss: 3.021580E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.371 | TFLOPs: 30.33 | +7: iteration 18610/ 21553 | consumed samples: 4764160 | consumed tokens: 9756999680 | elapsed time per iteration (s): 0.30 | learning rate: 2.832E-05 | global batch size: 256 | lm loss: 3.018216E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.906 | TFLOPs: 30.35 | +7: iteration 18620/ 21553 | consumed samples: 4766720 | consumed tokens: 9762242560 | elapsed time per iteration (s): 0.30 | learning rate: 2.826E-05 | global batch size: 256 | lm loss: 3.025540E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.971 | TFLOPs: 30.28 | +7: iteration 18630/ 21553 | consumed samples: 4769280 | consumed tokens: 9767485440 | elapsed time per iteration (s): 0.30 | learning rate: 2.821E-05 | global batch size: 256 | lm loss: 3.031897E+00 | grad norm: 0.417 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.400 | TFLOPs: 30.33 | +7: iteration 18640/ 21553 | consumed samples: 4771840 | consumed tokens: 9772728320 | elapsed time per iteration (s): 0.30 | learning rate: 2.815E-05 | global batch size: 256 | lm loss: 3.013198E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.898 | TFLOPs: 30.35 | +7: iteration 18650/ 21553 | consumed samples: 4774400 | consumed tokens: 9777971200 | elapsed time per iteration (s): 0.30 | learning rate: 2.810E-05 | global batch size: 256 | lm loss: 3.036149E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.369 | TFLOPs: 30.29 | +7: iteration 18660/ 21553 | consumed samples: 4776960 | consumed tokens: 9783214080 | elapsed time per iteration (s): 0.30 | learning rate: 2.804E-05 | global batch size: 256 | lm loss: 3.028241E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.651 | TFLOPs: 30.34 | +7: iteration 18670/ 21553 | consumed samples: 4779520 | consumed tokens: 9788456960 | elapsed time per iteration (s): 0.30 | learning rate: 2.799E-05 | global batch size: 256 | lm loss: 3.020423E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.874 | TFLOPs: 30.31 | +7: iteration 18680/ 21553 | consumed samples: 4782080 | consumed tokens: 9793699840 | elapsed time per iteration (s): 0.30 | learning rate: 2.793E-05 | global batch size: 256 | lm loss: 3.017456E+00 | grad norm: 0.454 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.558 | TFLOPs: 30.34 | +7: iteration 18690/ 21553 | consumed samples: 4784640 | consumed tokens: 9798942720 | elapsed time per iteration (s): 0.30 | learning rate: 2.788E-05 | global batch size: 256 | lm loss: 3.014292E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.845 | TFLOPs: 30.31 | +7: iteration 18700/ 21553 | consumed samples: 4787200 | consumed tokens: 9804185600 | elapsed time per iteration (s): 0.30 | learning rate: 2.782E-05 | global batch size: 256 | lm loss: 3.025667E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.972 | TFLOPs: 30.28 | +7: iteration 18710/ 21553 | consumed samples: 4789760 | consumed tokens: 9809428480 | elapsed time per iteration (s): 0.30 | learning rate: 2.777E-05 | global batch size: 256 | lm loss: 3.035688E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.367 | TFLOPs: 30.33 | +7: iteration 18720/ 21553 | consumed samples: 4792320 | consumed tokens: 9814671360 | elapsed time per iteration (s): 0.30 | learning rate: 2.772E-05 | global batch size: 256 | lm loss: 3.028491E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.492 | TFLOPs: 30.33 | +7: iteration 18730/ 21553 | consumed samples: 4794880 | consumed tokens: 9819914240 | elapsed time per iteration (s): 0.30 | learning rate: 2.766E-05 | global batch size: 256 | lm loss: 3.026001E+00 | grad norm: 0.449 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.748 | TFLOPs: 30.34 | +7: iteration 18740/ 21553 | consumed samples: 4797440 | consumed tokens: 9825157120 | elapsed time per iteration (s): 0.30 | learning rate: 2.761E-05 | global batch size: 256 | lm loss: 3.019640E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.405 | TFLOPs: 30.30 | +7: iteration 18750/ 21553 | consumed samples: 4800000 | consumed tokens: 9830400000 | elapsed time per iteration (s): 0.30 | learning rate: 2.756E-05 | global batch size: 256 | lm loss: 3.017294E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.260 | TFLOPs: 30.33 | +7: iteration 18760/ 21553 | consumed samples: 4802560 | consumed tokens: 9835642880 | elapsed time per iteration (s): 0.30 | learning rate: 2.750E-05 | global batch size: 256 | lm loss: 3.034373E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.760 | TFLOPs: 30.34 | +7: iteration 18770/ 21553 | consumed samples: 4805120 | consumed tokens: 9840885760 | elapsed time per iteration (s): 0.30 | learning rate: 2.745E-05 | global batch size: 256 | lm loss: 3.029840E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.169 | TFLOPs: 30.36 | +7: iteration 18780/ 21553 | consumed samples: 4807680 | consumed tokens: 9846128640 | elapsed time per iteration (s): 0.30 | learning rate: 2.740E-05 | global batch size: 256 | lm loss: 3.030929E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.550 | TFLOPs: 30.34 | +7: iteration 18790/ 21553 | consumed samples: 4810240 | consumed tokens: 9851371520 | elapsed time per iteration (s): 0.30 | learning rate: 2.735E-05 | global batch size: 256 | lm loss: 3.025330E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.599 | TFLOPs: 30.34 | +7: iteration 18800/ 21553 | consumed samples: 4812800 | consumed tokens: 9856614400 | elapsed time per iteration (s): 0.30 | learning rate: 2.729E-05 | global batch size: 256 | lm loss: 3.024895E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.275 | TFLOPs: 30.36 | +7: iteration 18810/ 21553 | consumed samples: 4815360 | consumed tokens: 9861857280 | elapsed time per iteration (s): 0.30 | learning rate: 2.724E-05 | global batch size: 256 | lm loss: 3.023001E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.195 | TFLOPs: 30.32 | +7: iteration 18820/ 21553 | consumed samples: 4817920 | consumed tokens: 9867100160 | elapsed time per iteration (s): 0.30 | learning rate: 2.719E-05 | global batch size: 256 | lm loss: 3.022760E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.205 | TFLOPs: 30.32 | +7: iteration 18830/ 21553 | consumed samples: 4820480 | consumed tokens: 9872343040 | elapsed time per iteration (s): 0.30 | learning rate: 2.714E-05 | global batch size: 256 | lm loss: 3.018627E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.511 | TFLOPs: 30.33 | +7: iteration 18840/ 21553 | consumed samples: 4823040 | consumed tokens: 9877585920 | elapsed time per iteration (s): 0.30 | learning rate: 2.709E-05 | global batch size: 256 | lm loss: 3.014348E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.531 | TFLOPs: 30.33 | +7: iteration 18850/ 21553 | consumed samples: 4825600 | consumed tokens: 9882828800 | elapsed time per iteration (s): 0.30 | learning rate: 2.703E-05 | global batch size: 256 | lm loss: 3.027910E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.447 | TFLOPs: 30.30 | +7: iteration 18860/ 21553 | consumed samples: 4828160 | consumed tokens: 9888071680 | elapsed time per iteration (s): 0.30 | learning rate: 2.698E-05 | global batch size: 256 | lm loss: 3.018581E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.055 | TFLOPs: 30.32 | +7: iteration 18870/ 21553 | consumed samples: 4830720 | consumed tokens: 9893314560 | elapsed time per iteration (s): 0.30 | learning rate: 2.693E-05 | global batch size: 256 | lm loss: 3.019397E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.426 | TFLOPs: 30.30 | +7: iteration 18880/ 21553 | consumed samples: 4833280 | consumed tokens: 9898557440 | elapsed time per iteration (s): 0.30 | learning rate: 2.688E-05 | global batch size: 256 | lm loss: 3.020102E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.499 | TFLOPs: 30.30 | +7: iteration 18890/ 21553 | consumed samples: 4835840 | consumed tokens: 9903800320 | elapsed time per iteration (s): 0.30 | learning rate: 2.683E-05 | global batch size: 256 | lm loss: 3.021596E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.306 | TFLOPs: 30.29 | +7: iteration 18900/ 21553 | consumed samples: 4838400 | consumed tokens: 9909043200 | elapsed time per iteration (s): 0.31 | learning rate: 2.678E-05 | global batch size: 256 | lm loss: 3.023660E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 830.217 | TFLOPs: 29.06 | +7: iteration 18910/ 21553 | consumed samples: 4840960 | consumed tokens: 9914286080 | elapsed time per iteration (s): 0.30 | learning rate: 2.673E-05 | global batch size: 256 | lm loss: 3.017070E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 855.007 | TFLOPs: 29.93 | +7: iteration 18920/ 21553 | consumed samples: 4843520 | consumed tokens: 9919528960 | elapsed time per iteration (s): 0.30 | learning rate: 2.668E-05 | global batch size: 256 | lm loss: 3.024646E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.188 | TFLOPs: 30.32 | +7: iteration 18930/ 21553 | consumed samples: 4846080 | consumed tokens: 9924771840 | elapsed time per iteration (s): 0.30 | learning rate: 2.663E-05 | global batch size: 256 | lm loss: 3.026323E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.659 | TFLOPs: 30.34 | +7: iteration 18940/ 21553 | consumed samples: 4848640 | consumed tokens: 9930014720 | elapsed time per iteration (s): 0.30 | learning rate: 2.658E-05 | global batch size: 256 | lm loss: 3.017928E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.460 | TFLOPs: 30.33 | +7: iteration 18950/ 21553 | consumed samples: 4851200 | consumed tokens: 9935257600 | elapsed time per iteration (s): 0.30 | learning rate: 2.653E-05 | global batch size: 256 | lm loss: 3.014977E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.842 | TFLOPs: 30.31 | +7: iteration 18960/ 21553 | consumed samples: 4853760 | consumed tokens: 9940500480 | elapsed time per iteration (s): 0.30 | learning rate: 2.648E-05 | global batch size: 256 | lm loss: 3.026933E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.157 | TFLOPs: 30.32 | +7: iteration 18970/ 21553 | consumed samples: 4856320 | consumed tokens: 9945743360 | elapsed time per iteration (s): 0.30 | learning rate: 2.643E-05 | global batch size: 256 | lm loss: 3.014775E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.706 | TFLOPs: 30.31 | +7: iteration 18980/ 21553 | consumed samples: 4858880 | consumed tokens: 9950986240 | elapsed time per iteration (s): 0.30 | learning rate: 2.638E-05 | global batch size: 256 | lm loss: 3.027470E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.373 | TFLOPs: 30.29 | +7: iteration 18990/ 21553 | consumed samples: 4861440 | consumed tokens: 9956229120 | elapsed time per iteration (s): 0.30 | learning rate: 2.633E-05 | global batch size: 256 | lm loss: 3.021199E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.921 | TFLOPs: 30.00 | +7: iteration 19000/ 21553 | consumed samples: 4864000 | consumed tokens: 9961472000 | elapsed time per iteration (s): 0.30 | learning rate: 2.628E-05 | global batch size: 256 | lm loss: 3.014264E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.710 | TFLOPs: 30.31 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 19000 | lm loss value: 3.753428E+00 | lm loss PPL: 4.266710E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 19000 to checkpoints_146m14b100m +0: [2023-03-14 00:52:45,681] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step19000 is begin to save! +0: [2023-03-14 00:52:45,684] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:52:45,772] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:52:45,773] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:52:45,790] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:52:45,790] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:52:45,805] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:52:45,805] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:52:45,820] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:52:45,820] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:52:45,835] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:52:45,835] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:52:45,850] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:52:45,850] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:52:45,865] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:52:45,865] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:52:45,880] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:52:45,880] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:52:45,895] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:52:45,895] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:52:45,910] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:52:45,910] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:52:45,925] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:52:45,925] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:52:45,940] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:52:45,940] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:52:45,955] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:52:45,956] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:52:45,970] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:52:45,971] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:52:45,986] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:52:45,986] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:52:46,001] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:52:46,001] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:52:46,002] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:52:46,003] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step19000/mp_rank_00_model_states.pt +0: [2023-03-14 00:52:46,003] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:52:46,005] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:52:46,021] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,057] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,057] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,057] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,057] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,058] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,059] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,062] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:52:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +7: [2023-03-14 00:52:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,064] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,066] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:52:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,066] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +1: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,058] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,058] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,058] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,058] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,058] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,058] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,059] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,059] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,059] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,063] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,064] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,063] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,064] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,064] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +2: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +2: [2023-03-14 00:52:46,064] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:52:46,064] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +3: [2023-03-14 00:52:46,067] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,068] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:52:46,068] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,069] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,069] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:52:46,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,070] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:52:46,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +6: [2023-03-14 00:52:46,070] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,062] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,062] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,062] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:52:46,065] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 00:52:46,065] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,067] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,067] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,067] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,067] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:52:46,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,073] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +4: [2023-03-14 00:52:46,073] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: [2023-03-14 00:52:46,084] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step19000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:52:46,084] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step19000 is ready now! +0: successfully saved checkpoint at iteration 19000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 407.33 +7: iteration 19010/ 21553 | consumed samples: 4866560 | consumed tokens: 9966714880 | elapsed time per iteration (s): 0.35 | learning rate: 2.624E-05 | global batch size: 256 | lm loss: 3.034846E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 736.715 | TFLOPs: 25.79 | +7: iteration 19020/ 21553 | consumed samples: 4869120 | consumed tokens: 9971957760 | elapsed time per iteration (s): 0.30 | learning rate: 2.619E-05 | global batch size: 256 | lm loss: 3.020688E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.355 | TFLOPs: 30.33 | +7: iteration 19030/ 21553 | consumed samples: 4871680 | consumed tokens: 9977200640 | elapsed time per iteration (s): 0.30 | learning rate: 2.614E-05 | global batch size: 256 | lm loss: 3.027797E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.668 | TFLOPs: 30.09 | +7: iteration 19040/ 21553 | consumed samples: 4874240 | consumed tokens: 9982443520 | elapsed time per iteration (s): 0.30 | learning rate: 2.609E-05 | global batch size: 256 | lm loss: 3.025375E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.750 | TFLOPs: 30.34 | +7: iteration 19050/ 21553 | consumed samples: 4876800 | consumed tokens: 9987686400 | elapsed time per iteration (s): 0.30 | learning rate: 2.604E-05 | global batch size: 256 | lm loss: 3.016748E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.799 | TFLOPs: 30.31 | +7: iteration 19060/ 21553 | consumed samples: 4879360 | consumed tokens: 9992929280 | elapsed time per iteration (s): 0.30 | learning rate: 2.600E-05 | global batch size: 256 | lm loss: 3.026236E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.639 | TFLOPs: 30.30 | +7: iteration 19070/ 21553 | consumed samples: 4881920 | consumed tokens: 9998172160 | elapsed time per iteration (s): 0.30 | learning rate: 2.595E-05 | global batch size: 256 | lm loss: 3.028069E+00 | grad norm: 0.413 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.581 | TFLOPs: 30.30 | +7: iteration 19080/ 21553 | consumed samples: 4884480 | consumed tokens: 10003415040 | elapsed time per iteration (s): 0.30 | learning rate: 2.590E-05 | global batch size: 256 | lm loss: 3.024536E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.411 | TFLOPs: 30.12 | +7: iteration 19090/ 21553 | consumed samples: 4887040 | consumed tokens: 10008657920 | elapsed time per iteration (s): 0.30 | learning rate: 2.585E-05 | global batch size: 256 | lm loss: 3.019834E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 864.489 | TFLOPs: 30.26 | +7: iteration 19100/ 21553 | consumed samples: 4889600 | consumed tokens: 10013900800 | elapsed time per iteration (s): 0.30 | learning rate: 2.581E-05 | global batch size: 256 | lm loss: 3.024158E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.067 | TFLOPs: 30.32 | +7: iteration 19110/ 21553 | consumed samples: 4892160 | consumed tokens: 10019143680 | elapsed time per iteration (s): 0.30 | learning rate: 2.576E-05 | global batch size: 256 | lm loss: 3.044031E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.249 | TFLOPs: 29.97 | +7: iteration 19120/ 21553 | consumed samples: 4894720 | consumed tokens: 10024386560 | elapsed time per iteration (s): 0.30 | learning rate: 2.571E-05 | global batch size: 256 | lm loss: 3.015189E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.486 | TFLOPs: 30.16 | +7: iteration 19130/ 21553 | consumed samples: 4897280 | consumed tokens: 10029629440 | elapsed time per iteration (s): 0.30 | learning rate: 2.567E-05 | global batch size: 256 | lm loss: 3.024142E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 862.035 | TFLOPs: 30.18 | +7: iteration 19140/ 21553 | consumed samples: 4899840 | consumed tokens: 10034872320 | elapsed time per iteration (s): 0.30 | learning rate: 2.562E-05 | global batch size: 256 | lm loss: 3.023312E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.208 | TFLOPs: 30.32 | +7: iteration 19150/ 21553 | consumed samples: 4902400 | consumed tokens: 10040115200 | elapsed time per iteration (s): 0.30 | learning rate: 2.557E-05 | global batch size: 256 | lm loss: 3.031889E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 859.637 | TFLOPs: 30.09 | +7: iteration 19160/ 21553 | consumed samples: 4904960 | consumed tokens: 10045358080 | elapsed time per iteration (s): 0.30 | learning rate: 2.553E-05 | global batch size: 256 | lm loss: 3.020235E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.920 | TFLOPs: 30.35 | +7: iteration 19170/ 21553 | consumed samples: 4907520 | consumed tokens: 10050600960 | elapsed time per iteration (s): 0.30 | learning rate: 2.548E-05 | global batch size: 256 | lm loss: 3.030671E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.024 | TFLOPs: 30.35 | +7: iteration 19180/ 21553 | consumed samples: 4910080 | consumed tokens: 10055843840 | elapsed time per iteration (s): 0.30 | learning rate: 2.544E-05 | global batch size: 256 | lm loss: 3.018348E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.484 | TFLOPs: 30.33 | +7: iteration 19190/ 21553 | consumed samples: 4912640 | consumed tokens: 10061086720 | elapsed time per iteration (s): 0.30 | learning rate: 2.539E-05 | global batch size: 256 | lm loss: 3.029696E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.907 | TFLOPs: 30.31 | +7: iteration 19200/ 21553 | consumed samples: 4915200 | consumed tokens: 10066329600 | elapsed time per iteration (s): 0.30 | learning rate: 2.535E-05 | global batch size: 256 | lm loss: 3.022160E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.068 | TFLOPs: 30.32 | +7: iteration 19210/ 21553 | consumed samples: 4917760 | consumed tokens: 10071572480 | elapsed time per iteration (s): 0.30 | learning rate: 2.530E-05 | global batch size: 256 | lm loss: 3.030081E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.770 | TFLOPs: 30.34 | +7: iteration 19220/ 21553 | consumed samples: 4920320 | consumed tokens: 10076815360 | elapsed time per iteration (s): 0.30 | learning rate: 2.526E-05 | global batch size: 256 | lm loss: 3.025057E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.635 | TFLOPs: 30.34 | +7: iteration 19230/ 21553 | consumed samples: 4922880 | consumed tokens: 10082058240 | elapsed time per iteration (s): 0.30 | learning rate: 2.521E-05 | global batch size: 256 | lm loss: 3.013520E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.108 | TFLOPs: 30.36 | +7: iteration 19240/ 21553 | consumed samples: 4925440 | consumed tokens: 10087301120 | elapsed time per iteration (s): 0.30 | learning rate: 2.517E-05 | global batch size: 256 | lm loss: 3.019155E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.897 | TFLOPs: 30.35 | +7: iteration 19250/ 21553 | consumed samples: 4928000 | consumed tokens: 10092544000 | elapsed time per iteration (s): 0.30 | learning rate: 2.512E-05 | global batch size: 256 | lm loss: 3.006517E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.840 | TFLOPs: 30.35 | +7: iteration 19260/ 21553 | consumed samples: 4930560 | consumed tokens: 10097786880 | elapsed time per iteration (s): 0.30 | learning rate: 2.508E-05 | global batch size: 256 | lm loss: 3.022603E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.641 | TFLOPs: 30.37 | +7: iteration 19270/ 21553 | consumed samples: 4933120 | consumed tokens: 10103029760 | elapsed time per iteration (s): 0.30 | learning rate: 2.504E-05 | global batch size: 256 | lm loss: 3.017552E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.207 | TFLOPs: 30.36 | +7: iteration 19280/ 21553 | consumed samples: 4935680 | consumed tokens: 10108272640 | elapsed time per iteration (s): 0.30 | learning rate: 2.499E-05 | global batch size: 256 | lm loss: 3.018472E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.613 | TFLOPs: 30.34 | +7: iteration 19290/ 21553 | consumed samples: 4938240 | consumed tokens: 10113515520 | elapsed time per iteration (s): 0.30 | learning rate: 2.495E-05 | global batch size: 256 | lm loss: 3.028399E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.761 | TFLOPs: 30.34 | +7: iteration 19300/ 21553 | consumed samples: 4940800 | consumed tokens: 10118758400 | elapsed time per iteration (s): 0.30 | learning rate: 2.491E-05 | global batch size: 256 | lm loss: 3.028530E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.716 | TFLOPs: 30.34 | +7: iteration 19310/ 21553 | consumed samples: 4943360 | consumed tokens: 10124001280 | elapsed time per iteration (s): 0.30 | learning rate: 2.486E-05 | global batch size: 256 | lm loss: 3.010416E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.270 | TFLOPs: 30.33 | +7: iteration 19320/ 21553 | consumed samples: 4945920 | consumed tokens: 10129244160 | elapsed time per iteration (s): 0.30 | learning rate: 2.482E-05 | global batch size: 256 | lm loss: 3.023715E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.642 | TFLOPs: 30.34 | +7: iteration 19330/ 21553 | consumed samples: 4948480 | consumed tokens: 10134487040 | elapsed time per iteration (s): 0.30 | learning rate: 2.478E-05 | global batch size: 256 | lm loss: 3.012003E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.821 | TFLOPs: 30.35 | +7: iteration 19340/ 21553 | consumed samples: 4951040 | consumed tokens: 10139729920 | elapsed time per iteration (s): 0.30 | learning rate: 2.474E-05 | global batch size: 256 | lm loss: 3.009093E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.856 | TFLOPs: 30.35 | +7: iteration 19350/ 21553 | consumed samples: 4953600 | consumed tokens: 10144972800 | elapsed time per iteration (s): 0.30 | learning rate: 2.469E-05 | global batch size: 256 | lm loss: 3.017187E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 840.202 | TFLOPs: 29.41 | +7: iteration 19360/ 21553 | consumed samples: 4956160 | consumed tokens: 10150215680 | elapsed time per iteration (s): 0.30 | learning rate: 2.465E-05 | global batch size: 256 | lm loss: 3.018295E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.487 | TFLOPs: 30.37 | +7: iteration 19370/ 21553 | consumed samples: 4958720 | consumed tokens: 10155458560 | elapsed time per iteration (s): 0.30 | learning rate: 2.461E-05 | global batch size: 256 | lm loss: 3.019049E+00 | grad norm: 0.457 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.913 | TFLOPs: 30.35 | +7: iteration 19380/ 21553 | consumed samples: 4961280 | consumed tokens: 10160701440 | elapsed time per iteration (s): 0.30 | learning rate: 2.457E-05 | global batch size: 256 | lm loss: 3.022100E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.227 | TFLOPs: 30.32 | +7: iteration 19390/ 21553 | consumed samples: 4963840 | consumed tokens: 10165944320 | elapsed time per iteration (s): 0.30 | learning rate: 2.453E-05 | global batch size: 256 | lm loss: 3.021557E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.053 | TFLOPs: 30.35 | +7: iteration 19400/ 21553 | consumed samples: 4966400 | consumed tokens: 10171187200 | elapsed time per iteration (s): 0.30 | learning rate: 2.448E-05 | global batch size: 256 | lm loss: 2.997353E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.878 | TFLOPs: 30.35 | +7: iteration 19410/ 21553 | consumed samples: 4968960 | consumed tokens: 10176430080 | elapsed time per iteration (s): 0.30 | learning rate: 2.444E-05 | global batch size: 256 | lm loss: 3.020106E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.128 | TFLOPs: 30.36 | +7: iteration 19420/ 21553 | consumed samples: 4971520 | consumed tokens: 10181672960 | elapsed time per iteration (s): 0.30 | learning rate: 2.440E-05 | global batch size: 256 | lm loss: 3.004709E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.777 | TFLOPs: 30.31 | +7: iteration 19430/ 21553 | consumed samples: 4974080 | consumed tokens: 10186915840 | elapsed time per iteration (s): 0.30 | learning rate: 2.436E-05 | global batch size: 256 | lm loss: 3.015437E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.893 | TFLOPs: 30.31 | +7: iteration 19440/ 21553 | consumed samples: 4976640 | consumed tokens: 10192158720 | elapsed time per iteration (s): 0.30 | learning rate: 2.432E-05 | global batch size: 256 | lm loss: 3.004996E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.848 | TFLOPs: 30.31 | +7: iteration 19450/ 21553 | consumed samples: 4979200 | consumed tokens: 10197401600 | elapsed time per iteration (s): 0.30 | learning rate: 2.428E-05 | global batch size: 256 | lm loss: 3.014991E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.943 | TFLOPs: 30.31 | +7: iteration 19460/ 21553 | consumed samples: 4981760 | consumed tokens: 10202644480 | elapsed time per iteration (s): 0.30 | learning rate: 2.424E-05 | global batch size: 256 | lm loss: 3.008723E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.399 | TFLOPs: 30.30 | +7: iteration 19470/ 21553 | consumed samples: 4984320 | consumed tokens: 10207887360 | elapsed time per iteration (s): 0.30 | learning rate: 2.420E-05 | global batch size: 256 | lm loss: 3.010942E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.756 | TFLOPs: 30.31 | +7: iteration 19480/ 21553 | consumed samples: 4986880 | consumed tokens: 10213130240 | elapsed time per iteration (s): 0.30 | learning rate: 2.416E-05 | global batch size: 256 | lm loss: 3.024520E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.547 | TFLOPs: 30.34 | +7: iteration 19490/ 21553 | consumed samples: 4989440 | consumed tokens: 10218373120 | elapsed time per iteration (s): 0.30 | learning rate: 2.412E-05 | global batch size: 256 | lm loss: 3.025880E+00 | grad norm: 0.414 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.776 | TFLOPs: 30.34 | +7: iteration 19500/ 21553 | consumed samples: 4992000 | consumed tokens: 10223616000 | elapsed time per iteration (s): 0.30 | learning rate: 2.408E-05 | global batch size: 256 | lm loss: 3.020643E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 861.122 | TFLOPs: 30.15 | +7: iteration 19510/ 21553 | consumed samples: 4994560 | consumed tokens: 10228858880 | elapsed time per iteration (s): 0.30 | learning rate: 2.404E-05 | global batch size: 256 | lm loss: 3.024411E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.491 | TFLOPs: 30.33 | +7: iteration 19520/ 21553 | consumed samples: 4997120 | consumed tokens: 10234101760 | elapsed time per iteration (s): 0.30 | learning rate: 2.400E-05 | global batch size: 256 | lm loss: 3.018712E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.152 | TFLOPs: 30.32 | +7: iteration 19530/ 21553 | consumed samples: 4999680 | consumed tokens: 10239344640 | elapsed time per iteration (s): 0.30 | learning rate: 2.396E-05 | global batch size: 256 | lm loss: 3.020095E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.367 | TFLOPs: 30.33 | +7: iteration 19540/ 21553 | consumed samples: 5002240 | consumed tokens: 10244587520 | elapsed time per iteration (s): 0.30 | learning rate: 2.392E-05 | global batch size: 256 | lm loss: 3.023018E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.815 | TFLOPs: 30.31 | +7: iteration 19550/ 21553 | consumed samples: 5004800 | consumed tokens: 10249830400 | elapsed time per iteration (s): 0.30 | learning rate: 2.389E-05 | global batch size: 256 | lm loss: 2.999647E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.387 | TFLOPs: 30.33 | +7: iteration 19560/ 21553 | consumed samples: 5007360 | consumed tokens: 10255073280 | elapsed time per iteration (s): 0.30 | learning rate: 2.385E-05 | global batch size: 256 | lm loss: 3.023209E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.208 | TFLOPs: 30.32 | +7: iteration 19570/ 21553 | consumed samples: 5009920 | consumed tokens: 10260316160 | elapsed time per iteration (s): 0.30 | learning rate: 2.381E-05 | global batch size: 256 | lm loss: 3.025949E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.315 | TFLOPs: 30.33 | +7: iteration 19580/ 21553 | consumed samples: 5012480 | consumed tokens: 10265559040 | elapsed time per iteration (s): 0.30 | learning rate: 2.377E-05 | global batch size: 256 | lm loss: 3.015632E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.760 | TFLOPs: 30.34 | +7: iteration 19590/ 21553 | consumed samples: 5015040 | consumed tokens: 10270801920 | elapsed time per iteration (s): 0.30 | learning rate: 2.373E-05 | global batch size: 256 | lm loss: 3.013047E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.834 | TFLOPs: 30.31 | +7: iteration 19600/ 21553 | consumed samples: 5017600 | consumed tokens: 10276044800 | elapsed time per iteration (s): 0.30 | learning rate: 2.370E-05 | global batch size: 256 | lm loss: 3.011866E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.688 | TFLOPs: 30.34 | +7: iteration 19610/ 21553 | consumed samples: 5020160 | consumed tokens: 10281287680 | elapsed time per iteration (s): 0.30 | learning rate: 2.366E-05 | global batch size: 256 | lm loss: 3.018356E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.861 | TFLOPs: 30.35 | +7: iteration 19620/ 21553 | consumed samples: 5022720 | consumed tokens: 10286530560 | elapsed time per iteration (s): 0.30 | learning rate: 2.362E-05 | global batch size: 256 | lm loss: 3.007961E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.223 | TFLOPs: 30.32 | +7: iteration 19630/ 21553 | consumed samples: 5025280 | consumed tokens: 10291773440 | elapsed time per iteration (s): 0.30 | learning rate: 2.358E-05 | global batch size: 256 | lm loss: 3.011546E+00 | grad norm: 0.418 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.179 | TFLOPs: 30.32 | +7: iteration 19640/ 21553 | consumed samples: 5027840 | consumed tokens: 10297016320 | elapsed time per iteration (s): 0.30 | learning rate: 2.355E-05 | global batch size: 256 | lm loss: 3.022585E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.481 | TFLOPs: 30.33 | +7: iteration 19650/ 21553 | consumed samples: 5030400 | consumed tokens: 10302259200 | elapsed time per iteration (s): 0.30 | learning rate: 2.351E-05 | global batch size: 256 | lm loss: 3.016392E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.087 | TFLOPs: 30.32 | +7: iteration 19660/ 21553 | consumed samples: 5032960 | consumed tokens: 10307502080 | elapsed time per iteration (s): 0.30 | learning rate: 2.347E-05 | global batch size: 256 | lm loss: 3.020601E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.070 | TFLOPs: 30.32 | +7: iteration 19670/ 21553 | consumed samples: 5035520 | consumed tokens: 10312744960 | elapsed time per iteration (s): 0.30 | learning rate: 2.344E-05 | global batch size: 256 | lm loss: 3.021796E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.052 | TFLOPs: 30.32 | +7: iteration 19680/ 21553 | consumed samples: 5038080 | consumed tokens: 10317987840 | elapsed time per iteration (s): 0.30 | learning rate: 2.340E-05 | global batch size: 256 | lm loss: 3.019046E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.748 | TFLOPs: 30.38 | +7: iteration 19690/ 21553 | consumed samples: 5040640 | consumed tokens: 10323230720 | elapsed time per iteration (s): 0.30 | learning rate: 2.336E-05 | global batch size: 256 | lm loss: 3.013671E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.780 | TFLOPs: 30.34 | +7: iteration 19700/ 21553 | consumed samples: 5043200 | consumed tokens: 10328473600 | elapsed time per iteration (s): 0.30 | learning rate: 2.333E-05 | global batch size: 256 | lm loss: 3.021891E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.851 | TFLOPs: 30.35 | +7: iteration 19710/ 21553 | consumed samples: 5045760 | consumed tokens: 10333716480 | elapsed time per iteration (s): 0.30 | learning rate: 2.329E-05 | global batch size: 256 | lm loss: 3.030125E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.410 | TFLOPs: 30.33 | +7: iteration 19720/ 21553 | consumed samples: 5048320 | consumed tokens: 10338959360 | elapsed time per iteration (s): 0.30 | learning rate: 2.326E-05 | global batch size: 256 | lm loss: 2.997686E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.612 | TFLOPs: 30.37 | +7: iteration 19730/ 21553 | consumed samples: 5050880 | consumed tokens: 10344202240 | elapsed time per iteration (s): 0.30 | learning rate: 2.322E-05 | global batch size: 256 | lm loss: 2.998862E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.138 | TFLOPs: 30.36 | +7: iteration 19740/ 21553 | consumed samples: 5053440 | consumed tokens: 10349445120 | elapsed time per iteration (s): 0.30 | learning rate: 2.319E-05 | global batch size: 256 | lm loss: 3.017550E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.586 | TFLOPs: 30.37 | +7: iteration 19750/ 21553 | consumed samples: 5056000 | consumed tokens: 10354688000 | elapsed time per iteration (s): 0.30 | learning rate: 2.315E-05 | global batch size: 256 | lm loss: 3.011447E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.238 | TFLOPs: 30.36 | +7: iteration 19760/ 21553 | consumed samples: 5058560 | consumed tokens: 10359930880 | elapsed time per iteration (s): 0.30 | learning rate: 2.312E-05 | global batch size: 256 | lm loss: 3.017274E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.173 | TFLOPs: 30.36 | +7: iteration 19770/ 21553 | consumed samples: 5061120 | consumed tokens: 10365173760 | elapsed time per iteration (s): 0.30 | learning rate: 2.308E-05 | global batch size: 256 | lm loss: 3.008841E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.896 | TFLOPs: 30.35 | +7: iteration 19780/ 21553 | consumed samples: 5063680 | consumed tokens: 10370416640 | elapsed time per iteration (s): 0.30 | learning rate: 2.305E-05 | global batch size: 256 | lm loss: 3.027914E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.558 | TFLOPs: 30.34 | +7: iteration 19790/ 21553 | consumed samples: 5066240 | consumed tokens: 10375659520 | elapsed time per iteration (s): 0.30 | learning rate: 2.302E-05 | global batch size: 256 | lm loss: 3.006507E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.148 | TFLOPs: 30.36 | +7: iteration 19800/ 21553 | consumed samples: 5068800 | consumed tokens: 10380902400 | elapsed time per iteration (s): 0.30 | learning rate: 2.298E-05 | global batch size: 256 | lm loss: 3.014407E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.844 | TFLOPs: 30.35 | +7: iteration 19810/ 21553 | consumed samples: 5071360 | consumed tokens: 10386145280 | elapsed time per iteration (s): 0.30 | learning rate: 2.295E-05 | global batch size: 256 | lm loss: 3.007771E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.943 | TFLOPs: 30.35 | +7: iteration 19820/ 21553 | consumed samples: 5073920 | consumed tokens: 10391388160 | elapsed time per iteration (s): 0.30 | learning rate: 2.291E-05 | global batch size: 256 | lm loss: 3.017991E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.969 | TFLOPs: 30.35 | +7: iteration 19830/ 21553 | consumed samples: 5076480 | consumed tokens: 10396631040 | elapsed time per iteration (s): 0.30 | learning rate: 2.288E-05 | global batch size: 256 | lm loss: 3.007457E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.242 | TFLOPs: 30.36 | +7: iteration 19840/ 21553 | consumed samples: 5079040 | consumed tokens: 10401873920 | elapsed time per iteration (s): 0.30 | learning rate: 2.285E-05 | global batch size: 256 | lm loss: 3.018562E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.192 | TFLOPs: 30.36 | +7: iteration 19850/ 21553 | consumed samples: 5081600 | consumed tokens: 10407116800 | elapsed time per iteration (s): 0.30 | learning rate: 2.281E-05 | global batch size: 256 | lm loss: 3.018280E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.138 | TFLOPs: 30.36 | +7: iteration 19860/ 21553 | consumed samples: 5084160 | consumed tokens: 10412359680 | elapsed time per iteration (s): 0.30 | learning rate: 2.278E-05 | global batch size: 256 | lm loss: 3.020835E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.220 | TFLOPs: 30.36 | +7: iteration 19870/ 21553 | consumed samples: 5086720 | consumed tokens: 10417602560 | elapsed time per iteration (s): 0.30 | learning rate: 2.275E-05 | global batch size: 256 | lm loss: 3.005262E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.813 | TFLOPs: 30.34 | +7: iteration 19880/ 21553 | consumed samples: 5089280 | consumed tokens: 10422845440 | elapsed time per iteration (s): 0.30 | learning rate: 2.272E-05 | global batch size: 256 | lm loss: 3.031312E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.594 | TFLOPs: 30.37 | +7: iteration 19890/ 21553 | consumed samples: 5091840 | consumed tokens: 10428088320 | elapsed time per iteration (s): 0.30 | learning rate: 2.268E-05 | global batch size: 256 | lm loss: 3.004462E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.477 | TFLOPs: 30.37 | +7: iteration 19900/ 21553 | consumed samples: 5094400 | consumed tokens: 10433331200 | elapsed time per iteration (s): 0.30 | learning rate: 2.265E-05 | global batch size: 256 | lm loss: 3.024187E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.806 | TFLOPs: 30.34 | +7: iteration 19910/ 21553 | consumed samples: 5096960 | consumed tokens: 10438574080 | elapsed time per iteration (s): 0.30 | learning rate: 2.262E-05 | global batch size: 256 | lm loss: 3.016676E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.876 | TFLOPs: 30.35 | +7: iteration 19920/ 21553 | consumed samples: 5099520 | consumed tokens: 10443816960 | elapsed time per iteration (s): 0.30 | learning rate: 2.259E-05 | global batch size: 256 | lm loss: 3.003646E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.312 | TFLOPs: 30.33 | +7: iteration 19930/ 21553 | consumed samples: 5102080 | consumed tokens: 10449059840 | elapsed time per iteration (s): 0.30 | learning rate: 2.256E-05 | global batch size: 256 | lm loss: 3.010836E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.441 | TFLOPs: 30.33 | +7: iteration 19940/ 21553 | consumed samples: 5104640 | consumed tokens: 10454302720 | elapsed time per iteration (s): 0.30 | learning rate: 2.253E-05 | global batch size: 256 | lm loss: 3.011205E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.871 | TFLOPs: 30.31 | +7: iteration 19950/ 21553 | consumed samples: 5107200 | consumed tokens: 10459545600 | elapsed time per iteration (s): 0.30 | learning rate: 2.250E-05 | global batch size: 256 | lm loss: 3.014208E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.148 | TFLOPs: 30.32 | +7: iteration 19960/ 21553 | consumed samples: 5109760 | consumed tokens: 10464788480 | elapsed time per iteration (s): 0.30 | learning rate: 2.246E-05 | global batch size: 256 | lm loss: 3.017749E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.182 | TFLOPs: 30.36 | +7: iteration 19970/ 21553 | consumed samples: 5112320 | consumed tokens: 10470031360 | elapsed time per iteration (s): 0.30 | learning rate: 2.243E-05 | global batch size: 256 | lm loss: 3.012889E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.855 | TFLOPs: 30.35 | +7: iteration 19980/ 21553 | consumed samples: 5114880 | consumed tokens: 10475274240 | elapsed time per iteration (s): 0.30 | learning rate: 2.240E-05 | global batch size: 256 | lm loss: 3.016691E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.919 | TFLOPs: 30.35 | +7: iteration 19990/ 21553 | consumed samples: 5117440 | consumed tokens: 10480517120 | elapsed time per iteration (s): 0.30 | learning rate: 2.237E-05 | global batch size: 256 | lm loss: 3.018080E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.817 | TFLOPs: 30.34 | +0: [2023-03-14 00:57:41,756] [INFO] [logging.py:68:log_dist] [Rank 0] step=20000, skipped=0, lr=[2.234259142486404e-05, 2.234259142486404e-05, 2.234259142486404e-05], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +7: iteration 20000/ 21553 | consumed samples: 5120000 | consumed tokens: 10485760000 | elapsed time per iteration (s): 0.30 | learning rate: 2.234E-05 | global batch size: 256 | lm loss: 3.013870E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.846 | TFLOPs: 30.35 | +0: steps: 20000 loss: 3.0473 iter time (s): 0.294 samples/sec: 871.699 +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 20000 | lm loss value: 3.804929E+00 | lm loss PPL: 4.492208E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 20000 to checkpoints_146m14b100m +0: [2023-03-14 00:57:41,876] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step20000 is begin to save! +0: [2023-03-14 00:57:41,879] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_01-model_00-model_states.pt... +0: [2023-03-14 00:57:41,965] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_01-model_00-model_states.pt. +0: [2023-03-14 00:57:41,965] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_03-model_00-model_states.pt... +0: [2023-03-14 00:57:41,980] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_03-model_00-model_states.pt. +0: [2023-03-14 00:57:41,981] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_04-model_00-model_states.pt... +0: [2023-03-14 00:57:41,995] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_04-model_00-model_states.pt. +0: [2023-03-14 00:57:41,996] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_05-model_00-model_states.pt... +0: [2023-03-14 00:57:42,010] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_05-model_00-model_states.pt. +0: [2023-03-14 00:57:42,011] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_06-model_00-model_states.pt... +0: [2023-03-14 00:57:42,026] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_06-model_00-model_states.pt. +0: [2023-03-14 00:57:42,026] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_07-model_00-model_states.pt... +0: [2023-03-14 00:57:42,041] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_07-model_00-model_states.pt. +0: [2023-03-14 00:57:42,041] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_08-model_00-model_states.pt... +0: [2023-03-14 00:57:42,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_08-model_00-model_states.pt. +0: [2023-03-14 00:57:42,056] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_09-model_00-model_states.pt... +0: [2023-03-14 00:57:42,071] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_09-model_00-model_states.pt. +0: [2023-03-14 00:57:42,071] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_10-model_00-model_states.pt... +0: [2023-03-14 00:57:42,086] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_10-model_00-model_states.pt. +0: [2023-03-14 00:57:42,086] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_11-model_00-model_states.pt... +0: [2023-03-14 00:57:42,101] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_11-model_00-model_states.pt. +0: [2023-03-14 00:57:42,101] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_12-model_00-model_states.pt... +0: [2023-03-14 00:57:42,116] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_12-model_00-model_states.pt. +0: [2023-03-14 00:57:42,116] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_13-model_00-model_states.pt... +0: [2023-03-14 00:57:42,131] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_13-model_00-model_states.pt. +0: [2023-03-14 00:57:42,131] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_14-model_00-model_states.pt... +0: [2023-03-14 00:57:42,146] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_14-model_00-model_states.pt. +0: [2023-03-14 00:57:42,146] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_15-model_00-model_states.pt... +0: [2023-03-14 00:57:42,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_15-model_00-model_states.pt. +0: [2023-03-14 00:57:42,161] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_16-model_00-model_states.pt... +0: [2023-03-14 00:57:42,176] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_16-model_00-model_states.pt. +0: [2023-03-14 00:57:42,177] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_17-model_00-model_states.pt... +0: [2023-03-14 00:57:42,191] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_17-model_00-model_states.pt. +0: [2023-03-14 00:57:42,192] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/layer_19-model_00-model_states.pt... +0: [2023-03-14 00:57:42,193] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/layer_19-model_00-model_states.pt. +0: [2023-03-14 00:57:42,193] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step20000/mp_rank_00_model_states.pt +0: [2023-03-14 00:57:42,193] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/mp_rank_00_model_states.pt... +0: [2023-03-14 00:57:42,195] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/mp_rank_00_model_states.pt. +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 00:57:42,212] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +5: [2023-03-14 00:57:42,245] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,245] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,245] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,246] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,246] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,246] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,246] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,247] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,247] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,247] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,247] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,247] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,247] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,247] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,247] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,250] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,251] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,248] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,248] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,249] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,249] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,251] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,250] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,251] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,250] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,251] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,250] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,250] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,251] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,251] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,252] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,252] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,253] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,253] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,253] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,254] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,254] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,254] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,254] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,254] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 00:57:42,254] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +2: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +2: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +3: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +3: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,255] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,255] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +4: [2023-03-14 00:57:42,257] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 00:57:42,257] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 00:57:42,257] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,257] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,258] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 00:57:42,258] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,258] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,258] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +6: [2023-03-14 00:57:42,258] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 00:57:42,258] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 00:57:42,259] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,259] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,259] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +7: [2023-03-14 00:57:42,259] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +5: [2023-03-14 00:57:42,263] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 00:57:42,263] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 00:57:42,263] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,265] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,265] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,265] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,265] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +1: [2023-03-14 00:57:42,266] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 00:57:42,266] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 00:57:42,266] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: [2023-03-14 00:57:42,268] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step20000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 00:57:42,268] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step20000 is ready now! +0: successfully saved checkpoint at iteration 20000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 398.10 +7: iteration 20010/ 21553 | consumed samples: 5122560 | consumed tokens: 10491002880 | elapsed time per iteration (s): 0.35 | learning rate: 2.231E-05 | global batch size: 256 | lm loss: 3.007466E+00 | grad norm: 0.416 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 739.446 | TFLOPs: 25.89 | +7: iteration 20020/ 21553 | consumed samples: 5125120 | consumed tokens: 10496245760 | elapsed time per iteration (s): 0.30 | learning rate: 2.228E-05 | global batch size: 256 | lm loss: 3.012366E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.475 | TFLOPs: 30.37 | +7: iteration 20030/ 21553 | consumed samples: 5127680 | consumed tokens: 10501488640 | elapsed time per iteration (s): 0.30 | learning rate: 2.225E-05 | global batch size: 256 | lm loss: 3.016132E+00 | grad norm: 0.472 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.276 | TFLOPs: 30.36 | +7: iteration 20040/ 21553 | consumed samples: 5130240 | consumed tokens: 10506731520 | elapsed time per iteration (s): 0.30 | learning rate: 2.222E-05 | global batch size: 256 | lm loss: 3.003061E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.507 | TFLOPs: 30.37 | +7: iteration 20050/ 21553 | consumed samples: 5132800 | consumed tokens: 10511974400 | elapsed time per iteration (s): 0.30 | learning rate: 2.219E-05 | global batch size: 256 | lm loss: 3.015039E+00 | grad norm: 0.451 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.014 | TFLOPs: 30.35 | +7: iteration 20060/ 21553 | consumed samples: 5135360 | consumed tokens: 10517217280 | elapsed time per iteration (s): 0.30 | learning rate: 2.217E-05 | global batch size: 256 | lm loss: 3.023784E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.459 | TFLOPs: 30.37 | +7: iteration 20070/ 21553 | consumed samples: 5137920 | consumed tokens: 10522460160 | elapsed time per iteration (s): 0.30 | learning rate: 2.214E-05 | global batch size: 256 | lm loss: 3.017613E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.517 | TFLOPs: 30.37 | +7: iteration 20080/ 21553 | consumed samples: 5140480 | consumed tokens: 10527703040 | elapsed time per iteration (s): 0.30 | learning rate: 2.211E-05 | global batch size: 256 | lm loss: 3.020666E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.448 | TFLOPs: 30.33 | +7: iteration 20090/ 21553 | consumed samples: 5143040 | consumed tokens: 10532945920 | elapsed time per iteration (s): 0.30 | learning rate: 2.208E-05 | global batch size: 256 | lm loss: 3.019643E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.270 | TFLOPs: 30.33 | +7: iteration 20100/ 21553 | consumed samples: 5145600 | consumed tokens: 10538188800 | elapsed time per iteration (s): 0.30 | learning rate: 2.205E-05 | global batch size: 256 | lm loss: 3.021385E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.518 | TFLOPs: 30.33 | +7: iteration 20110/ 21553 | consumed samples: 5148160 | consumed tokens: 10543431680 | elapsed time per iteration (s): 0.30 | learning rate: 2.202E-05 | global batch size: 256 | lm loss: 2.996935E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.491 | TFLOPs: 30.33 | +7: iteration 20120/ 21553 | consumed samples: 5150720 | consumed tokens: 10548674560 | elapsed time per iteration (s): 0.30 | learning rate: 2.200E-05 | global batch size: 256 | lm loss: 3.026398E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.720 | TFLOPs: 30.34 | +7: iteration 20130/ 21553 | consumed samples: 5153280 | consumed tokens: 10553917440 | elapsed time per iteration (s): 0.30 | learning rate: 2.197E-05 | global batch size: 256 | lm loss: 3.011683E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.834 | TFLOPs: 30.35 | +7: iteration 20140/ 21553 | consumed samples: 5155840 | consumed tokens: 10559160320 | elapsed time per iteration (s): 0.30 | learning rate: 2.194E-05 | global batch size: 256 | lm loss: 3.009089E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.967 | TFLOPs: 30.35 | +7: iteration 20150/ 21553 | consumed samples: 5158400 | consumed tokens: 10564403200 | elapsed time per iteration (s): 0.30 | learning rate: 2.191E-05 | global batch size: 256 | lm loss: 3.009533E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.738 | TFLOPs: 30.34 | +7: iteration 20160/ 21553 | consumed samples: 5160960 | consumed tokens: 10569646080 | elapsed time per iteration (s): 0.30 | learning rate: 2.189E-05 | global batch size: 256 | lm loss: 3.015391E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.052 | TFLOPs: 30.35 | +7: iteration 20170/ 21553 | consumed samples: 5163520 | consumed tokens: 10574888960 | elapsed time per iteration (s): 0.30 | learning rate: 2.186E-05 | global batch size: 256 | lm loss: 3.018644E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.828 | TFLOPs: 30.35 | +7: iteration 20180/ 21553 | consumed samples: 5166080 | consumed tokens: 10580131840 | elapsed time per iteration (s): 0.30 | learning rate: 2.183E-05 | global batch size: 256 | lm loss: 3.015648E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.906 | TFLOPs: 30.35 | +7: iteration 20190/ 21553 | consumed samples: 5168640 | consumed tokens: 10585374720 | elapsed time per iteration (s): 0.30 | learning rate: 2.181E-05 | global batch size: 256 | lm loss: 2.999291E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.162 | TFLOPs: 30.32 | +7: iteration 20200/ 21553 | consumed samples: 5171200 | consumed tokens: 10590617600 | elapsed time per iteration (s): 0.30 | learning rate: 2.178E-05 | global batch size: 256 | lm loss: 2.999557E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.278 | TFLOPs: 30.29 | +7: iteration 20210/ 21553 | consumed samples: 5173760 | consumed tokens: 10595860480 | elapsed time per iteration (s): 0.30 | learning rate: 2.175E-05 | global batch size: 256 | lm loss: 3.007108E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.233 | TFLOPs: 30.32 | +7: iteration 20220/ 21553 | consumed samples: 5176320 | consumed tokens: 10601103360 | elapsed time per iteration (s): 0.30 | learning rate: 2.173E-05 | global batch size: 256 | lm loss: 3.022237E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.780 | TFLOPs: 30.34 | +7: iteration 20230/ 21553 | consumed samples: 5178880 | consumed tokens: 10606346240 | elapsed time per iteration (s): 0.30 | learning rate: 2.170E-05 | global batch size: 256 | lm loss: 3.009333E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.684 | TFLOPs: 30.38 | +7: iteration 20240/ 21553 | consumed samples: 5181440 | consumed tokens: 10611589120 | elapsed time per iteration (s): 0.30 | learning rate: 2.168E-05 | global batch size: 256 | lm loss: 3.022486E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.012 | TFLOPs: 30.35 | +7: iteration 20250/ 21553 | consumed samples: 5184000 | consumed tokens: 10616832000 | elapsed time per iteration (s): 0.30 | learning rate: 2.165E-05 | global batch size: 256 | lm loss: 3.008134E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.589 | TFLOPs: 30.37 | +7: iteration 20260/ 21553 | consumed samples: 5186560 | consumed tokens: 10622074880 | elapsed time per iteration (s): 0.29 | learning rate: 2.163E-05 | global batch size: 256 | lm loss: 3.012622E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.806 | TFLOPs: 30.38 | +7: iteration 20270/ 21553 | consumed samples: 5189120 | consumed tokens: 10627317760 | elapsed time per iteration (s): 0.30 | learning rate: 2.160E-05 | global batch size: 256 | lm loss: 3.020838E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.091 | TFLOPs: 30.35 | +7: iteration 20280/ 21553 | consumed samples: 5191680 | consumed tokens: 10632560640 | elapsed time per iteration (s): 0.30 | learning rate: 2.158E-05 | global batch size: 256 | lm loss: 3.012710E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.005 | TFLOPs: 30.35 | +7: iteration 20290/ 21553 | consumed samples: 5194240 | consumed tokens: 10637803520 | elapsed time per iteration (s): 0.30 | learning rate: 2.155E-05 | global batch size: 256 | lm loss: 2.999356E+00 | grad norm: 0.415 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.649 | TFLOPs: 30.37 | +7: iteration 20300/ 21553 | consumed samples: 5196800 | consumed tokens: 10643046400 | elapsed time per iteration (s): 0.30 | learning rate: 2.153E-05 | global batch size: 256 | lm loss: 2.998334E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.135 | TFLOPs: 30.36 | +7: iteration 20310/ 21553 | consumed samples: 5199360 | consumed tokens: 10648289280 | elapsed time per iteration (s): 0.30 | learning rate: 2.150E-05 | global batch size: 256 | lm loss: 3.006562E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.276 | TFLOPs: 30.36 | +7: iteration 20320/ 21553 | consumed samples: 5201920 | consumed tokens: 10653532160 | elapsed time per iteration (s): 0.30 | learning rate: 2.148E-05 | global batch size: 256 | lm loss: 3.008564E+00 | grad norm: 0.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.001 | TFLOPs: 30.35 | +7: iteration 20330/ 21553 | consumed samples: 5204480 | consumed tokens: 10658775040 | elapsed time per iteration (s): 0.30 | learning rate: 2.146E-05 | global batch size: 256 | lm loss: 3.016933E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.659 | TFLOPs: 30.34 | +7: iteration 20340/ 21553 | consumed samples: 5207040 | consumed tokens: 10664017920 | elapsed time per iteration (s): 0.30 | learning rate: 2.143E-05 | global batch size: 256 | lm loss: 3.018906E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.177 | TFLOPs: 30.32 | +7: iteration 20350/ 21553 | consumed samples: 5209600 | consumed tokens: 10669260800 | elapsed time per iteration (s): 0.30 | learning rate: 2.141E-05 | global batch size: 256 | lm loss: 3.034235E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.286 | TFLOPs: 30.36 | +7: iteration 20360/ 21553 | consumed samples: 5212160 | consumed tokens: 10674503680 | elapsed time per iteration (s): 0.30 | learning rate: 2.138E-05 | global batch size: 256 | lm loss: 3.009363E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.153 | TFLOPs: 30.36 | +7: iteration 20370/ 21553 | consumed samples: 5214720 | consumed tokens: 10679746560 | elapsed time per iteration (s): 0.30 | learning rate: 2.136E-05 | global batch size: 256 | lm loss: 3.001814E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.269 | TFLOPs: 30.36 | +7: iteration 20380/ 21553 | consumed samples: 5217280 | consumed tokens: 10684989440 | elapsed time per iteration (s): 0.29 | learning rate: 2.134E-05 | global batch size: 256 | lm loss: 3.016599E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.866 | TFLOPs: 30.38 | +7: iteration 20390/ 21553 | consumed samples: 5219840 | consumed tokens: 10690232320 | elapsed time per iteration (s): 0.30 | learning rate: 2.132E-05 | global batch size: 256 | lm loss: 3.009891E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.577 | TFLOPs: 30.37 | +7: iteration 20400/ 21553 | consumed samples: 5222400 | consumed tokens: 10695475200 | elapsed time per iteration (s): 0.30 | learning rate: 2.129E-05 | global batch size: 256 | lm loss: 3.023386E+00 | grad norm: 0.420 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.521 | TFLOPs: 30.37 | +7: iteration 20410/ 21553 | consumed samples: 5224960 | consumed tokens: 10700718080 | elapsed time per iteration (s): 0.30 | learning rate: 2.127E-05 | global batch size: 256 | lm loss: 3.009626E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.097 | TFLOPs: 30.35 | +7: iteration 20420/ 21553 | consumed samples: 5227520 | consumed tokens: 10705960960 | elapsed time per iteration (s): 0.30 | learning rate: 2.125E-05 | global batch size: 256 | lm loss: 3.006997E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.772 | TFLOPs: 30.38 | +7: iteration 20430/ 21553 | consumed samples: 5230080 | consumed tokens: 10711203840 | elapsed time per iteration (s): 0.30 | learning rate: 2.123E-05 | global batch size: 256 | lm loss: 3.030221E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.854 | TFLOPs: 30.35 | +7: iteration 20440/ 21553 | consumed samples: 5232640 | consumed tokens: 10716446720 | elapsed time per iteration (s): 0.30 | learning rate: 2.121E-05 | global batch size: 256 | lm loss: 3.014625E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.558 | TFLOPs: 30.37 | +7: iteration 20450/ 21553 | consumed samples: 5235200 | consumed tokens: 10721689600 | elapsed time per iteration (s): 0.30 | learning rate: 2.118E-05 | global batch size: 256 | lm loss: 3.009204E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.372 | TFLOPs: 30.36 | +7: iteration 20460/ 21553 | consumed samples: 5237760 | consumed tokens: 10726932480 | elapsed time per iteration (s): 0.30 | learning rate: 2.116E-05 | global batch size: 256 | lm loss: 3.006784E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.149 | TFLOPs: 30.36 | +7: iteration 20470/ 21553 | consumed samples: 5240320 | consumed tokens: 10732175360 | elapsed time per iteration (s): 0.29 | learning rate: 2.114E-05 | global batch size: 256 | lm loss: 3.017327E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.896 | TFLOPs: 30.38 | +7: iteration 20480/ 21553 | consumed samples: 5242880 | consumed tokens: 10737418240 | elapsed time per iteration (s): 0.30 | learning rate: 2.112E-05 | global batch size: 256 | lm loss: 3.019083E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.214 | TFLOPs: 30.36 | +7: iteration 20490/ 21553 | consumed samples: 5245440 | consumed tokens: 10742661120 | elapsed time per iteration (s): 0.30 | learning rate: 2.110E-05 | global batch size: 256 | lm loss: 3.017051E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.613 | TFLOPs: 30.23 | +7: iteration 20500/ 21553 | consumed samples: 5248000 | consumed tokens: 10747904000 | elapsed time per iteration (s): 0.30 | learning rate: 2.108E-05 | global batch size: 256 | lm loss: 2.988225E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.616 | TFLOPs: 30.37 | +7: iteration 20510/ 21553 | consumed samples: 5250560 | consumed tokens: 10753146880 | elapsed time per iteration (s): 0.30 | learning rate: 2.106E-05 | global batch size: 256 | lm loss: 3.004503E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.156 | TFLOPs: 30.36 | +7: iteration 20520/ 21553 | consumed samples: 5253120 | consumed tokens: 10758389760 | elapsed time per iteration (s): 0.30 | learning rate: 2.104E-05 | global batch size: 256 | lm loss: 3.017270E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.385 | TFLOPs: 30.36 | +7: iteration 20530/ 21553 | consumed samples: 5255680 | consumed tokens: 10763632640 | elapsed time per iteration (s): 0.29 | learning rate: 2.102E-05 | global batch size: 256 | lm loss: 3.007530E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 868.107 | TFLOPs: 30.39 | +7: iteration 20540/ 21553 | consumed samples: 5258240 | consumed tokens: 10768875520 | elapsed time per iteration (s): 0.30 | learning rate: 2.100E-05 | global batch size: 256 | lm loss: 3.018950E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.545 | TFLOPs: 30.37 | +7: iteration 20550/ 21553 | consumed samples: 5260800 | consumed tokens: 10774118400 | elapsed time per iteration (s): 0.30 | learning rate: 2.098E-05 | global batch size: 256 | lm loss: 3.007522E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.689 | TFLOPs: 30.34 | +7: iteration 20560/ 21553 | consumed samples: 5263360 | consumed tokens: 10779361280 | elapsed time per iteration (s): 0.30 | learning rate: 2.096E-05 | global batch size: 256 | lm loss: 3.014172E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.536 | TFLOPs: 30.34 | +7: iteration 20570/ 21553 | consumed samples: 5265920 | consumed tokens: 10784604160 | elapsed time per iteration (s): 0.30 | learning rate: 2.094E-05 | global batch size: 256 | lm loss: 3.024652E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.215 | TFLOPs: 30.36 | +7: iteration 20580/ 21553 | consumed samples: 5268480 | consumed tokens: 10789847040 | elapsed time per iteration (s): 0.30 | learning rate: 2.092E-05 | global batch size: 256 | lm loss: 3.015562E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.351 | TFLOPs: 30.36 | +7: iteration 20590/ 21553 | consumed samples: 5271040 | consumed tokens: 10795089920 | elapsed time per iteration (s): 0.30 | learning rate: 2.090E-05 | global batch size: 256 | lm loss: 3.000685E+00 | grad norm: 0.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.250 | TFLOPs: 30.22 | +7: iteration 20600/ 21553 | consumed samples: 5273600 | consumed tokens: 10800332800 | elapsed time per iteration (s): 0.30 | learning rate: 2.088E-05 | global batch size: 256 | lm loss: 3.021264E+00 | grad norm: 0.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.048 | TFLOPs: 30.32 | +7: iteration 20610/ 21553 | consumed samples: 5276160 | consumed tokens: 10805575680 | elapsed time per iteration (s): 0.30 | learning rate: 2.087E-05 | global batch size: 256 | lm loss: 3.003095E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.184 | TFLOPs: 30.36 | +7: iteration 20620/ 21553 | consumed samples: 5278720 | consumed tokens: 10810818560 | elapsed time per iteration (s): 0.30 | learning rate: 2.085E-05 | global batch size: 256 | lm loss: 3.013387E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.079 | TFLOPs: 30.35 | +7: iteration 20630/ 21553 | consumed samples: 5281280 | consumed tokens: 10816061440 | elapsed time per iteration (s): 0.30 | learning rate: 2.083E-05 | global batch size: 256 | lm loss: 3.014617E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 860.398 | TFLOPs: 30.12 | +7: iteration 20640/ 21553 | consumed samples: 5283840 | consumed tokens: 10821304320 | elapsed time per iteration (s): 0.30 | learning rate: 2.081E-05 | global batch size: 256 | lm loss: 2.998007E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.797 | TFLOPs: 30.34 | +7: iteration 20650/ 21553 | consumed samples: 5286400 | consumed tokens: 10826547200 | elapsed time per iteration (s): 0.30 | learning rate: 2.079E-05 | global batch size: 256 | lm loss: 3.010642E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.621 | TFLOPs: 30.37 | +7: iteration 20660/ 21553 | consumed samples: 5288960 | consumed tokens: 10831790080 | elapsed time per iteration (s): 0.30 | learning rate: 2.078E-05 | global batch size: 256 | lm loss: 3.016126E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.468 | TFLOPs: 30.37 | +7: iteration 20670/ 21553 | consumed samples: 5291520 | consumed tokens: 10837032960 | elapsed time per iteration (s): 0.30 | learning rate: 2.076E-05 | global batch size: 256 | lm loss: 3.008098E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.803 | TFLOPs: 30.34 | +7: iteration 20680/ 21553 | consumed samples: 5294080 | consumed tokens: 10842275840 | elapsed time per iteration (s): 0.30 | learning rate: 2.074E-05 | global batch size: 256 | lm loss: 3.000840E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.652 | TFLOPs: 30.37 | +7: iteration 20690/ 21553 | consumed samples: 5296640 | consumed tokens: 10847518720 | elapsed time per iteration (s): 0.30 | learning rate: 2.073E-05 | global batch size: 256 | lm loss: 3.017607E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.916 | TFLOPs: 30.35 | +7: iteration 20700/ 21553 | consumed samples: 5299200 | consumed tokens: 10852761600 | elapsed time per iteration (s): 0.30 | learning rate: 2.071E-05 | global batch size: 256 | lm loss: 3.008717E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.678 | TFLOPs: 29.99 | +7: iteration 20710/ 21553 | consumed samples: 5301760 | consumed tokens: 10858004480 | elapsed time per iteration (s): 0.30 | learning rate: 2.069E-05 | global batch size: 256 | lm loss: 3.010186E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.819 | TFLOPs: 30.34 | +7: iteration 20720/ 21553 | consumed samples: 5304320 | consumed tokens: 10863247360 | elapsed time per iteration (s): 0.30 | learning rate: 2.068E-05 | global batch size: 256 | lm loss: 3.015557E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.140 | TFLOPs: 30.32 | +7: iteration 20730/ 21553 | consumed samples: 5306880 | consumed tokens: 10868490240 | elapsed time per iteration (s): 0.30 | learning rate: 2.066E-05 | global batch size: 256 | lm loss: 3.004730E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.173 | TFLOPs: 30.32 | +7: iteration 20740/ 21553 | consumed samples: 5309440 | consumed tokens: 10873733120 | elapsed time per iteration (s): 0.30 | learning rate: 2.064E-05 | global batch size: 256 | lm loss: 3.006321E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.336 | TFLOPs: 30.33 | +7: iteration 20750/ 21553 | consumed samples: 5312000 | consumed tokens: 10878976000 | elapsed time per iteration (s): 0.30 | learning rate: 2.063E-05 | global batch size: 256 | lm loss: 3.011580E+00 | grad norm: 0.453 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.011 | TFLOPs: 30.35 | +7: iteration 20760/ 21553 | consumed samples: 5314560 | consumed tokens: 10884218880 | elapsed time per iteration (s): 0.30 | learning rate: 2.061E-05 | global batch size: 256 | lm loss: 3.010603E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 858.767 | TFLOPs: 30.06 | +7: iteration 20770/ 21553 | consumed samples: 5317120 | consumed tokens: 10889461760 | elapsed time per iteration (s): 0.30 | learning rate: 2.060E-05 | global batch size: 256 | lm loss: 3.016736E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.250 | TFLOPs: 30.36 | +7: iteration 20780/ 21553 | consumed samples: 5319680 | consumed tokens: 10894704640 | elapsed time per iteration (s): 0.30 | learning rate: 2.058E-05 | global batch size: 256 | lm loss: 3.013133E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.218 | TFLOPs: 30.36 | +7: iteration 20790/ 21553 | consumed samples: 5322240 | consumed tokens: 10899947520 | elapsed time per iteration (s): 0.30 | learning rate: 2.057E-05 | global batch size: 256 | lm loss: 3.006845E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.377 | TFLOPs: 30.36 | +7: iteration 20800/ 21553 | consumed samples: 5324800 | consumed tokens: 10905190400 | elapsed time per iteration (s): 0.30 | learning rate: 2.055E-05 | global batch size: 256 | lm loss: 3.018996E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.538 | TFLOPs: 30.34 | +7: iteration 20810/ 21553 | consumed samples: 5327360 | consumed tokens: 10910433280 | elapsed time per iteration (s): 0.30 | learning rate: 2.054E-05 | global batch size: 256 | lm loss: 3.008874E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.834 | TFLOPs: 30.35 | +7: iteration 20820/ 21553 | consumed samples: 5329920 | consumed tokens: 10915676160 | elapsed time per iteration (s): 0.30 | learning rate: 2.052E-05 | global batch size: 256 | lm loss: 3.009184E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.273 | TFLOPs: 30.36 | +7: iteration 20830/ 21553 | consumed samples: 5332480 | consumed tokens: 10920919040 | elapsed time per iteration (s): 0.30 | learning rate: 2.051E-05 | global batch size: 256 | lm loss: 3.021462E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.036 | TFLOPs: 30.35 | +7: iteration 20840/ 21553 | consumed samples: 5335040 | consumed tokens: 10926161920 | elapsed time per iteration (s): 0.30 | learning rate: 2.050E-05 | global batch size: 256 | lm loss: 3.003294E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.995 | TFLOPs: 30.35 | +7: iteration 20850/ 21553 | consumed samples: 5337600 | consumed tokens: 10931404800 | elapsed time per iteration (s): 0.30 | learning rate: 2.048E-05 | global batch size: 256 | lm loss: 3.014913E+00 | grad norm: 0.427 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.743 | TFLOPs: 30.34 | +7: iteration 20860/ 21553 | consumed samples: 5340160 | consumed tokens: 10936647680 | elapsed time per iteration (s): 0.30 | learning rate: 2.047E-05 | global batch size: 256 | lm loss: 3.002833E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.708 | TFLOPs: 30.34 | +7: iteration 20870/ 21553 | consumed samples: 5342720 | consumed tokens: 10941890560 | elapsed time per iteration (s): 0.30 | learning rate: 2.045E-05 | global batch size: 256 | lm loss: 3.009993E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.593 | TFLOPs: 30.34 | +7: iteration 20880/ 21553 | consumed samples: 5345280 | consumed tokens: 10947133440 | elapsed time per iteration (s): 0.30 | learning rate: 2.044E-05 | global batch size: 256 | lm loss: 3.016400E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.253 | TFLOPs: 30.33 | +7: iteration 20890/ 21553 | consumed samples: 5347840 | consumed tokens: 10952376320 | elapsed time per iteration (s): 0.30 | learning rate: 2.043E-05 | global batch size: 256 | lm loss: 3.005807E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 856.816 | TFLOPs: 29.99 | +7: iteration 20900/ 21553 | consumed samples: 5350400 | consumed tokens: 10957619200 | elapsed time per iteration (s): 0.30 | learning rate: 2.042E-05 | global batch size: 256 | lm loss: 3.029853E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.022 | TFLOPs: 30.35 | +7: iteration 20910/ 21553 | consumed samples: 5352960 | consumed tokens: 10962862080 | elapsed time per iteration (s): 0.30 | learning rate: 2.040E-05 | global batch size: 256 | lm loss: 3.012258E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.319 | TFLOPs: 30.36 | +7: iteration 20920/ 21553 | consumed samples: 5355520 | consumed tokens: 10968104960 | elapsed time per iteration (s): 0.30 | learning rate: 2.039E-05 | global batch size: 256 | lm loss: 3.007774E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.834 | TFLOPs: 30.35 | +7: iteration 20930/ 21553 | consumed samples: 5358080 | consumed tokens: 10973347840 | elapsed time per iteration (s): 0.30 | learning rate: 2.038E-05 | global batch size: 256 | lm loss: 2.999448E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.219 | TFLOPs: 30.36 | +7: iteration 20940/ 21553 | consumed samples: 5360640 | consumed tokens: 10978590720 | elapsed time per iteration (s): 0.30 | learning rate: 2.037E-05 | global batch size: 256 | lm loss: 2.998102E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.965 | TFLOPs: 30.35 | +7: iteration 20950/ 21553 | consumed samples: 5363200 | consumed tokens: 10983833600 | elapsed time per iteration (s): 0.30 | learning rate: 2.035E-05 | global batch size: 256 | lm loss: 2.998139E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.792 | TFLOPs: 30.34 | +7: iteration 20960/ 21553 | consumed samples: 5365760 | consumed tokens: 10989076480 | elapsed time per iteration (s): 0.30 | learning rate: 2.034E-05 | global batch size: 256 | lm loss: 3.010266E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.634 | TFLOPs: 30.34 | +7: iteration 20970/ 21553 | consumed samples: 5368320 | consumed tokens: 10994319360 | elapsed time per iteration (s): 0.30 | learning rate: 2.033E-05 | global batch size: 256 | lm loss: 3.025166E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.543 | TFLOPs: 30.34 | +7: iteration 20980/ 21553 | consumed samples: 5370880 | consumed tokens: 10999562240 | elapsed time per iteration (s): 0.30 | learning rate: 2.032E-05 | global batch size: 256 | lm loss: 3.009762E+00 | grad norm: 0.424 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.261 | TFLOPs: 30.33 | +7: iteration 20990/ 21553 | consumed samples: 5373440 | consumed tokens: 11004805120 | elapsed time per iteration (s): 0.30 | learning rate: 2.031E-05 | global batch size: 256 | lm loss: 3.013545E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.311 | TFLOPs: 30.33 | +7: iteration 21000/ 21553 | consumed samples: 5376000 | consumed tokens: 11010048000 | elapsed time per iteration (s): 0.30 | learning rate: 2.030E-05 | global batch size: 256 | lm loss: 3.004387E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.678 | TFLOPs: 30.34 | +7: ------------------------------------------------------------------------------------------------ +7: validation loss at iteration 21000 | lm loss value: 3.807974E+00 | lm loss PPL: 4.505906E+01 | +7: ------------------------------------------------------------------------------------------------ +0: saving checkpoint at iteration 21000 to checkpoints_146m14b100m +0: [2023-03-14 01:02:37,786] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step21000 is begin to save! +0: [2023-03-14 01:02:37,790] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_01-model_00-model_states.pt... +0: [2023-03-14 01:02:37,876] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_01-model_00-model_states.pt. +0: [2023-03-14 01:02:37,876] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_03-model_00-model_states.pt... +0: [2023-03-14 01:02:37,891] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_03-model_00-model_states.pt. +0: [2023-03-14 01:02:37,892] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_04-model_00-model_states.pt... +0: [2023-03-14 01:02:37,906] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_04-model_00-model_states.pt. +0: [2023-03-14 01:02:37,907] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_05-model_00-model_states.pt... +0: [2023-03-14 01:02:37,921] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_05-model_00-model_states.pt. +0: [2023-03-14 01:02:37,922] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_06-model_00-model_states.pt... +0: [2023-03-14 01:02:37,936] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_06-model_00-model_states.pt. +0: [2023-03-14 01:02:37,936] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_07-model_00-model_states.pt... +0: [2023-03-14 01:02:37,951] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_07-model_00-model_states.pt. +0: [2023-03-14 01:02:37,951] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_08-model_00-model_states.pt... +0: [2023-03-14 01:02:37,966] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_08-model_00-model_states.pt. +0: [2023-03-14 01:02:37,966] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_09-model_00-model_states.pt... +0: [2023-03-14 01:02:37,981] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_09-model_00-model_states.pt. +0: [2023-03-14 01:02:37,982] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_10-model_00-model_states.pt... +0: [2023-03-14 01:02:37,996] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_10-model_00-model_states.pt. +0: [2023-03-14 01:02:37,997] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_11-model_00-model_states.pt... +0: [2023-03-14 01:02:38,011] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_11-model_00-model_states.pt. +0: [2023-03-14 01:02:38,012] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_12-model_00-model_states.pt... +0: [2023-03-14 01:02:38,027] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_12-model_00-model_states.pt. +0: [2023-03-14 01:02:38,027] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_13-model_00-model_states.pt... +0: [2023-03-14 01:02:38,042] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_13-model_00-model_states.pt. +0: [2023-03-14 01:02:38,042] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_14-model_00-model_states.pt... +0: [2023-03-14 01:02:38,057] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_14-model_00-model_states.pt. +0: [2023-03-14 01:02:38,057] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_15-model_00-model_states.pt... +0: [2023-03-14 01:02:38,072] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_15-model_00-model_states.pt. +0: [2023-03-14 01:02:38,072] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_16-model_00-model_states.pt... +0: [2023-03-14 01:02:38,087] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_16-model_00-model_states.pt. +0: [2023-03-14 01:02:38,087] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_17-model_00-model_states.pt... +0: [2023-03-14 01:02:38,102] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_17-model_00-model_states.pt. +0: [2023-03-14 01:02:38,102] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/layer_19-model_00-model_states.pt... +0: [2023-03-14 01:02:38,103] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/layer_19-model_00-model_states.pt. +0: [2023-03-14 01:02:38,104] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step21000/mp_rank_00_model_states.pt +0: [2023-03-14 01:02:38,104] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/mp_rank_00_model_states.pt... +0: [2023-03-14 01:02:38,106] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/mp_rank_00_model_states.pt. +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:02:38,124] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:02:38,157] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,157] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,157] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,157] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,157] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,157] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,158] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,158] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,158] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,158] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,158] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,159] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,159] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,159] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,159] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,159] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,155] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,155] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,155] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,160] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,160] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,161] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,161] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,161] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,162] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,162] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,163] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,163] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,165] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,165] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,165] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,166] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,166] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,167] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,167] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,167] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,167] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:02:38,167] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,167] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:02:38,169] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,169] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +7: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +5: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,174] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,174] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +2: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,169] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,170] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,169] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,170] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,171] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +3: [2023-03-14 01:02:38,171] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +3: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,171] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:02:38,171] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +4: [2023-03-14 01:02:38,171] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +4: [2023-03-14 01:02:38,171] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: [2023-03-14 01:02:38,177] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 01:02:38,177] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,177] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,177] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,177] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,177] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:02:38,178] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,178] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,178] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,178] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 01:02:38,178] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,178] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,178] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +6: [2023-03-14 01:02:38,178] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +1: [2023-03-14 01:02:38,179] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:02:38,179] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21000/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 01:02:38,179] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21000 is ready now! +0: successfully saved checkpoint at iteration 21000 to checkpoints_146m14b100m +7: time (ms) | save-checkpoint: 408.25 +7: iteration 21010/ 21553 | consumed samples: 5378560 | consumed tokens: 11015290880 | elapsed time per iteration (s): 0.35 | learning rate: 2.029E-05 | global batch size: 256 | lm loss: 2.998457E+00 | grad norm: 0.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 735.799 | TFLOPs: 25.76 | +7: iteration 21020/ 21553 | consumed samples: 5381120 | consumed tokens: 11020533760 | elapsed time per iteration (s): 0.30 | learning rate: 2.028E-05 | global batch size: 256 | lm loss: 2.996708E+00 | grad norm: 0.448 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.533 | TFLOPs: 30.33 | +7: iteration 21030/ 21553 | consumed samples: 5383680 | consumed tokens: 11025776640 | elapsed time per iteration (s): 0.30 | learning rate: 2.027E-05 | global batch size: 256 | lm loss: 3.007038E+00 | grad norm: 0.421 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.459 | TFLOPs: 30.33 | +7: iteration 21040/ 21553 | consumed samples: 5386240 | consumed tokens: 11031019520 | elapsed time per iteration (s): 0.30 | learning rate: 2.026E-05 | global batch size: 256 | lm loss: 3.009785E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.302 | TFLOPs: 30.33 | +7: iteration 21050/ 21553 | consumed samples: 5388800 | consumed tokens: 11036262400 | elapsed time per iteration (s): 0.30 | learning rate: 2.025E-05 | global batch size: 256 | lm loss: 3.001926E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.382 | TFLOPs: 30.33 | +7: iteration 21060/ 21553 | consumed samples: 5391360 | consumed tokens: 11041505280 | elapsed time per iteration (s): 0.30 | learning rate: 2.024E-05 | global batch size: 256 | lm loss: 2.999885E+00 | grad norm: 0.429 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.996 | TFLOPs: 30.35 | +7: iteration 21070/ 21553 | consumed samples: 5393920 | consumed tokens: 11046748160 | elapsed time per iteration (s): 0.30 | learning rate: 2.023E-05 | global batch size: 256 | lm loss: 3.003318E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.053 | TFLOPs: 30.35 | +7: iteration 21080/ 21553 | consumed samples: 5396480 | consumed tokens: 11051991040 | elapsed time per iteration (s): 0.30 | learning rate: 2.022E-05 | global batch size: 256 | lm loss: 3.007563E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.677 | TFLOPs: 30.34 | +7: iteration 21090/ 21553 | consumed samples: 5399040 | consumed tokens: 11057233920 | elapsed time per iteration (s): 0.30 | learning rate: 2.021E-05 | global batch size: 256 | lm loss: 2.993984E+00 | grad norm: 0.447 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.480 | TFLOPs: 30.33 | +7: iteration 21100/ 21553 | consumed samples: 5401600 | consumed tokens: 11062476800 | elapsed time per iteration (s): 0.30 | learning rate: 2.020E-05 | global batch size: 256 | lm loss: 3.007224E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.629 | TFLOPs: 30.34 | +7: iteration 21110/ 21553 | consumed samples: 5404160 | consumed tokens: 11067719680 | elapsed time per iteration (s): 0.30 | learning rate: 2.019E-05 | global batch size: 256 | lm loss: 3.007052E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 865.882 | TFLOPs: 30.31 | +7: iteration 21120/ 21553 | consumed samples: 5406720 | consumed tokens: 11072962560 | elapsed time per iteration (s): 0.30 | learning rate: 2.018E-05 | global batch size: 256 | lm loss: 3.005302E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.717 | TFLOPs: 30.34 | +7: iteration 21130/ 21553 | consumed samples: 5409280 | consumed tokens: 11078205440 | elapsed time per iteration (s): 0.30 | learning rate: 2.017E-05 | global batch size: 256 | lm loss: 3.012249E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.010 | TFLOPs: 30.35 | +7: iteration 21140/ 21553 | consumed samples: 5411840 | consumed tokens: 11083448320 | elapsed time per iteration (s): 0.30 | learning rate: 2.017E-05 | global batch size: 256 | lm loss: 3.006438E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.534 | TFLOPs: 30.33 | +7: iteration 21150/ 21553 | consumed samples: 5414400 | consumed tokens: 11088691200 | elapsed time per iteration (s): 0.30 | learning rate: 2.016E-05 | global batch size: 256 | lm loss: 3.007432E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.776 | TFLOPs: 30.34 | +7: iteration 21160/ 21553 | consumed samples: 5416960 | consumed tokens: 11093934080 | elapsed time per iteration (s): 0.30 | learning rate: 2.015E-05 | global batch size: 256 | lm loss: 3.022230E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.688 | TFLOPs: 30.34 | +7: iteration 21170/ 21553 | consumed samples: 5419520 | consumed tokens: 11099176960 | elapsed time per iteration (s): 0.30 | learning rate: 2.014E-05 | global batch size: 256 | lm loss: 3.014988E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.351 | TFLOPs: 30.33 | +7: iteration 21180/ 21553 | consumed samples: 5422080 | consumed tokens: 11104419840 | elapsed time per iteration (s): 0.30 | learning rate: 2.014E-05 | global batch size: 256 | lm loss: 3.013185E+00 | grad norm: 0.434 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.983 | TFLOPs: 30.35 | +7: iteration 21190/ 21553 | consumed samples: 5424640 | consumed tokens: 11109662720 | elapsed time per iteration (s): 0.30 | learning rate: 2.013E-05 | global batch size: 256 | lm loss: 3.016694E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.691 | TFLOPs: 30.34 | +7: iteration 21200/ 21553 | consumed samples: 5427200 | consumed tokens: 11114905600 | elapsed time per iteration (s): 0.30 | learning rate: 2.012E-05 | global batch size: 256 | lm loss: 2.998103E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.822 | TFLOPs: 30.35 | +7: iteration 21210/ 21553 | consumed samples: 5429760 | consumed tokens: 11120148480 | elapsed time per iteration (s): 0.30 | learning rate: 2.011E-05 | global batch size: 256 | lm loss: 3.014931E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.607 | TFLOPs: 30.34 | +7: iteration 21220/ 21553 | consumed samples: 5432320 | consumed tokens: 11125391360 | elapsed time per iteration (s): 0.30 | learning rate: 2.011E-05 | global batch size: 256 | lm loss: 2.999519E+00 | grad norm: 0.436 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.771 | TFLOPs: 30.34 | +7: iteration 21230/ 21553 | consumed samples: 5434880 | consumed tokens: 11130634240 | elapsed time per iteration (s): 0.30 | learning rate: 2.010E-05 | global batch size: 256 | lm loss: 3.004737E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.941 | TFLOPs: 30.35 | +7: iteration 21240/ 21553 | consumed samples: 5437440 | consumed tokens: 11135877120 | elapsed time per iteration (s): 0.30 | learning rate: 2.010E-05 | global batch size: 256 | lm loss: 2.999450E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.031 | TFLOPs: 30.35 | +7: iteration 21250/ 21553 | consumed samples: 5440000 | consumed tokens: 11141120000 | elapsed time per iteration (s): 0.30 | learning rate: 2.009E-05 | global batch size: 256 | lm loss: 3.006474E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 863.705 | TFLOPs: 30.24 | +7: iteration 21260/ 21553 | consumed samples: 5442560 | consumed tokens: 11146362880 | elapsed time per iteration (s): 0.30 | learning rate: 2.008E-05 | global batch size: 256 | lm loss: 3.006610E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.397 | TFLOPs: 30.33 | +7: iteration 21270/ 21553 | consumed samples: 5445120 | consumed tokens: 11151605760 | elapsed time per iteration (s): 0.30 | learning rate: 2.008E-05 | global batch size: 256 | lm loss: 3.005634E+00 | grad norm: 0.439 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.060 | TFLOPs: 30.32 | +7: iteration 21280/ 21553 | consumed samples: 5447680 | consumed tokens: 11156848640 | elapsed time per iteration (s): 0.30 | learning rate: 2.007E-05 | global batch size: 256 | lm loss: 3.002399E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.658 | TFLOPs: 30.34 | +7: iteration 21290/ 21553 | consumed samples: 5450240 | consumed tokens: 11162091520 | elapsed time per iteration (s): 0.30 | learning rate: 2.007E-05 | global batch size: 256 | lm loss: 3.017178E+00 | grad norm: 0.442 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.287 | TFLOPs: 30.33 | +7: iteration 21300/ 21553 | consumed samples: 5452800 | consumed tokens: 11167334400 | elapsed time per iteration (s): 0.30 | learning rate: 2.006E-05 | global batch size: 256 | lm loss: 3.006394E+00 | grad norm: 0.443 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.301 | TFLOPs: 30.33 | +7: iteration 21310/ 21553 | consumed samples: 5455360 | consumed tokens: 11172577280 | elapsed time per iteration (s): 0.30 | learning rate: 2.006E-05 | global batch size: 256 | lm loss: 3.004332E+00 | grad norm: 0.433 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.449 | TFLOPs: 30.33 | +7: iteration 21320/ 21553 | consumed samples: 5457920 | consumed tokens: 11177820160 | elapsed time per iteration (s): 0.30 | learning rate: 2.005E-05 | global batch size: 256 | lm loss: 3.000860E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.889 | TFLOPs: 30.35 | +7: iteration 21330/ 21553 | consumed samples: 5460480 | consumed tokens: 11183063040 | elapsed time per iteration (s): 0.30 | learning rate: 2.005E-05 | global batch size: 256 | lm loss: 3.001424E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.584 | TFLOPs: 30.34 | +7: iteration 21340/ 21553 | consumed samples: 5463040 | consumed tokens: 11188305920 | elapsed time per iteration (s): 0.30 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 3.009232E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.592 | TFLOPs: 30.34 | +7: iteration 21350/ 21553 | consumed samples: 5465600 | consumed tokens: 11193548800 | elapsed time per iteration (s): 0.30 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 3.006245E+00 | grad norm: 0.459 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.139 | TFLOPs: 30.32 | +7: iteration 21360/ 21553 | consumed samples: 5468160 | consumed tokens: 11198791680 | elapsed time per iteration (s): 0.30 | learning rate: 2.004E-05 | global batch size: 256 | lm loss: 3.007981E+00 | grad norm: 0.437 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.502 | TFLOPs: 30.33 | +7: iteration 21370/ 21553 | consumed samples: 5470720 | consumed tokens: 11204034560 | elapsed time per iteration (s): 0.30 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 3.009841E+00 | grad norm: 0.452 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.212 | TFLOPs: 30.32 | +7: iteration 21380/ 21553 | consumed samples: 5473280 | consumed tokens: 11209277440 | elapsed time per iteration (s): 0.30 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 3.005027E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.349 | TFLOPs: 30.33 | +7: iteration 21390/ 21553 | consumed samples: 5475840 | consumed tokens: 11214520320 | elapsed time per iteration (s): 0.30 | learning rate: 2.003E-05 | global batch size: 256 | lm loss: 3.027798E+00 | grad norm: 0.438 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.469 | TFLOPs: 30.33 | +7: iteration 21400/ 21553 | consumed samples: 5478400 | consumed tokens: 11219763200 | elapsed time per iteration (s): 0.30 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 2.998537E+00 | grad norm: 0.431 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.397 | TFLOPs: 30.33 | +7: iteration 21410/ 21553 | consumed samples: 5480960 | consumed tokens: 11225006080 | elapsed time per iteration (s): 0.30 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 3.019333E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.598 | TFLOPs: 30.34 | +7: iteration 21420/ 21553 | consumed samples: 5483520 | consumed tokens: 11230248960 | elapsed time per iteration (s): 0.30 | learning rate: 2.002E-05 | global batch size: 256 | lm loss: 3.004638E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.576 | TFLOPs: 30.34 | +7: iteration 21430/ 21553 | consumed samples: 5486080 | consumed tokens: 11235491840 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 3.017058E+00 | grad norm: 0.425 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.952 | TFLOPs: 30.35 | +7: iteration 21440/ 21553 | consumed samples: 5488640 | consumed tokens: 11240734720 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 3.010599E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.569 | TFLOPs: 30.34 | +7: iteration 21450/ 21553 | consumed samples: 5491200 | consumed tokens: 11245977600 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.998621E+00 | grad norm: 0.435 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.791 | TFLOPs: 30.34 | +7: iteration 21460/ 21553 | consumed samples: 5493760 | consumed tokens: 11251220480 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 3.011620E+00 | grad norm: 0.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.883 | TFLOPs: 30.35 | +7: iteration 21470/ 21553 | consumed samples: 5496320 | consumed tokens: 11256463360 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 2.994566E+00 | grad norm: 0.432 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.328 | TFLOPs: 30.36 | +7: iteration 21480/ 21553 | consumed samples: 5498880 | consumed tokens: 11261706240 | elapsed time per iteration (s): 0.30 | learning rate: 2.001E-05 | global batch size: 256 | lm loss: 3.003805E+00 | grad norm: 0.456 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.338 | TFLOPs: 30.36 | +7: iteration 21490/ 21553 | consumed samples: 5501440 | consumed tokens: 11266949120 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 3.014404E+00 | grad norm: 0.430 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.506 | TFLOPs: 30.33 | +7: iteration 21500/ 21553 | consumed samples: 5504000 | consumed tokens: 11272192000 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 3.012506E+00 | grad norm: 0.446 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.586 | TFLOPs: 30.34 | +7: iteration 21510/ 21553 | consumed samples: 5506560 | consumed tokens: 11277434880 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.996416E+00 | grad norm: 0.441 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 867.052 | TFLOPs: 30.35 | +7: iteration 21520/ 21553 | consumed samples: 5509120 | consumed tokens: 11282677760 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 3.003463E+00 | grad norm: 0.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.813 | TFLOPs: 30.34 | +7: iteration 21530/ 21553 | consumed samples: 5511680 | consumed tokens: 11287920640 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 3.013071E+00 | grad norm: 0.450 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.298 | TFLOPs: 30.33 | +7: iteration 21540/ 21553 | consumed samples: 5514240 | consumed tokens: 11293163520 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 3.001895E+00 | grad norm: 0.455 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.470 | TFLOPs: 30.33 | +7: iteration 21550/ 21553 | consumed samples: 5516800 | consumed tokens: 11298406400 | elapsed time per iteration (s): 0.30 | learning rate: 2.000E-05 | global batch size: 256 | lm loss: 2.991616E+00 | grad norm: 0.428 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 | samples per second: 866.146 | TFLOPs: 30.32 | +0: [after training is done] datetime: 2023-03-14 01:05:21 +0: saving checkpoint at iteration 21553 to checkpoints_146m14b100m +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.810600E+00 | lm loss PPL: 4.517752E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +0: [2023-03-14 01:05:21,680] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step21553 is begin to save! +0: [2023-03-14 01:05:21,683] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 01:05:21,769] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 01:05:21,769] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 01:05:21,785] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 01:05:21,785] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 01:05:21,799] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 01:05:21,800] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 01:05:21,814] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 01:05:21,815] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 01:05:21,829] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 01:05:21,829] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 01:05:21,844] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 01:05:21,844] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 01:05:21,859] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 01:05:21,859] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 01:05:21,873] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 01:05:21,874] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 01:05:21,888] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 01:05:21,888] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 01:05:21,903] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 01:05:21,903] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 01:05:21,918] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 01:05:21,918] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 01:05:21,933] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 01:05:21,933] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 01:05:21,947] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 01:05:21,948] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 01:05:21,962] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 01:05:21,963] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 01:05:21,977] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 01:05:21,977] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 01:05:21,992] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 01:05:21,992] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 01:05:21,993] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 01:05:21,994] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt +0: [2023-03-14 01:05:21,994] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 01:05:21,996] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +2: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +3: [2023-03-14 01:05:22,013] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +0: [2023-03-14 01:05:22,044] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,046] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,047] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,046] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,047] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,046] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,047] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,052] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,052] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,050] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,050] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt +3: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +3: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,054] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +3: [2023-03-14 01:05:22,054] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,048] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,048] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,049] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,049] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,051] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,051] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,053] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,053] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,053] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,053] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,055] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,055] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +5: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt +5: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +7: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +6: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt +6: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,061] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,061] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,061] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +1: [2023-03-14 01:05:22,062] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 01:05:22,062] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +1: [2023-03-14 01:05:22,062] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +2: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +2: [2023-03-14 01:05:22,060] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +2: [2023-03-14 01:05:22,060] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,056] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt +4: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +4: [2023-03-14 01:05:22,056] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: [2023-03-14 01:05:22,074] [INFO] [engine.py:3213:_save_zero_checkpoint] bf16_zero checkpoint saved checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +0: [2023-03-14 01:05:22,075] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step21553 is ready now! +0: successfully saved checkpoint at iteration 21553 to checkpoints_146m14b100m +END 3301357: Tue 14 Mar 2023 01:05:30 AM EET diff --git a/146m14b100m/logs/3302895.err b/146m14b100m/logs/3302895.err new file mode 100644 index 0000000000000000000000000000000000000000..bc30e45565ee4f4eba9bf557d213096b506d77db --- /dev/null +++ b/146m14b100m/logs/3302895.err @@ -0,0 +1,1118 @@ +4: 2023-03-14 09:40:47.338315: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-14 09:40:47.338334: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-14 09:40:47.338341: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: 2023-03-14 09:40:47.338645: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.338666: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338493: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338501: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338549: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: 2023-03-14 09:40:47.338490: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338499: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338506: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: 2023-03-14 09:40:47.338651: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.338692: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.338708: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338495: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338718: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338727: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338732: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: 2023-03-14 09:40:47.338357: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-14 09:40:47.338364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-14 09:40:47.338372: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: 2023-03-14 09:40:47.338724: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.338733: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338570: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338536: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.339239: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-03-14 09:40:47.339241: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338621: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338555: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338550: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338746: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338712: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-03-14 09:40:47.338749: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: 2023-03-14 09:40:47.338403: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338637: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-03-14 09:40:47.338588: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-03-14 09:40:47.338462: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:40:47.338718: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339424: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339440: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339450: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339494: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339508: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: 2023-03-14 09:40:47.339575: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339584: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339574: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339511: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-03-14 09:40:47.339518: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339620: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339631: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339625: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339628: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-03-14 09:40:47.339619: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339865: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339860: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339909: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339917: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339927: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339925: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-03-14 09:40:47.339944: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-03-14 09:41:00.796407: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796442: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-03-14 09:41:00.796437: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:00.796519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:00.796409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-03-14 09:41:00.796503: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796452: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:00.796379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-03-14 09:41:00.796457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:00.796538: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:00.796510: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:00.796448: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:00.796444: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796483: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:00.796423: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-03-14 09:41:00.797051: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:00.797101: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797171: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.796462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796473: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796507: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-03-14 09:41:00.796553: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-03-14 09:41:00.796541: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.797062: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:00.797229: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.796473: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796516: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-03-14 09:41:00.796565: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796457: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-03-14 09:41:00.796593: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.797075: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:00.797184: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.796498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796510: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796535: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-03-14 09:41:00.796570: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796464: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-03-14 09:41:00.796589: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.796482: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.796498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-03-14 09:41:00.796570: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796448: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-03-14 09:41:00.796601: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:00.797124: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-14 09:41:00.797121: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:00.797260: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.796495: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796576: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-03-14 09:41:00.796531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.796552: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-03-14 09:41:00.797131: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.796583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.796474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-03-14 09:41:00.796611: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.797088: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:00.797224: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:00.796514: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:00.797200: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797208: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:00.797280: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.796486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.796542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-03-14 09:41:00.796547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.797593: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:00.797144: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-14 09:41:00.797142: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-14 09:41:00.797157: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-03-14 09:41:00.797172: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.796640: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-03-14 09:41:00.797546: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.796658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:00.797109: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.797113: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.797123: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-03-14 09:41:00.797128: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:00.797254: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-14 09:41:00.797615: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:00.797575: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797578: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797586: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:00.797650: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.796593: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.797627: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797218: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797224: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797221: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-03-14 09:41:00.797236: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797597: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.796619: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.797674: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-14 09:41:00.797683: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:00.797647: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797609: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797615: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:00.797295: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.797299: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.797300: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.797318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-14 09:41:00.797698: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.796601: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-03-14 09:41:00.797652: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-03-14 09:41:00.797637: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-03-14 09:41:00.797325: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-14 09:41:00.797710: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:00.797667: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-14 09:41:00.797673: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-03-14 09:41:00.797673: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-14 09:41:00.797720: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-03-14 09:41:00.797723: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.796613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-03-14 09:41:00.797726: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:00.797283: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.796618: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:00.796605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:00.797313: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.797325: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.797330: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.797340: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:00.797350: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-03-14 09:41:33.269770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-03-14 09:41:33.269994: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.269987: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270159: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270045: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270028: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270022: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270192: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270073: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270197: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270042: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270034: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270206: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270217: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270055: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269855: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270058: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270224: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270121: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270234: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270071: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269865: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270067: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270238: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270145: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270250: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270090: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269892: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270074: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270152: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270251: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270099: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.269898: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-03-14 09:41:33.270080: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270250: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270266: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.270298: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:33.270106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-03-14 09:41:33.270264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.270172: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-03-14 09:41:33.270333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272814: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-03-14 09:41:33.272819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-03-14 09:41:33.272952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272825: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-03-14 09:41:33.273084: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272969: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272838: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-03-14 09:41:33.272957: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272828: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272977: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272840: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-14 09:41:33.272841: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.273099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-14 09:41:33.272960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272846: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-14 09:41:33.272847: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-03-14 09:41:33.272848: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-03-14 09:41:33.272851: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-03-14 09:41:33.273092: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272971: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272972: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-03-14 09:41:33.272922: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272961: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272983: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272985: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273098: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-03-14 09:41:33.272989: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272992: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273098: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273130: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272976: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-14 09:41:33.272976: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272993: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-03-14 09:41:33.272980: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-14 09:41:33.272979: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-14 09:41:33.272981: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272993: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272995: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-03-14 09:41:33.272999: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-03-14 09:41:33.272984: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-03-14 09:41:33.272984: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.273106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:33.273140: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273143: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-03-14 09:41:33.273147: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273148: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273149: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-03-14 09:41:33.273118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273119: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273152: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273123: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273123: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273124: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273184: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-03-14 09:41:33.273128: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-03-14 09:41:33.273127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:33.273190: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-03-14 09:41:33.273198: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-03-14 09:41:33.273204: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273651: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273653: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273656: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273654: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273660: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273668: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273667: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273665: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273673: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273675: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273675: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273675: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273684: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-03-14 09:41:33.273700: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-03-14 09:41:33.273714: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.298660: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298700: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298692: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298725: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298740: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298767: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.298780: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299821: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299851: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299866: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299894: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299935: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299948: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.299953: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301494: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301504: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301504: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301500: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301504: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-03-14 09:41:33.301514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301521: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301523: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301525: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-03-14 09:41:33.301525: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302309: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302314: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302318: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302322: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302339: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302339: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302339: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302346: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302346: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302400: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-03-14 09:41:33.302418: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-03-14 09:41:33.302420: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +4: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: +5: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +6: +6: +6: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +2: Building extension module utils... +2: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +2: Loading extension module utils... +0: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +2: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +0: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils...Loading extension module utils... +7: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: +7: Loading extension module utils...Loading extension module utils... +7: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils...Loading extension module utils... +3: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils...Loading extension module utils... +3: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +6: +6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +6: +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +5: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Loading extension module utils...Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils...Loading extension module utils... +4: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: +2: Loading extension module utils...Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: +0: Loading extension module utils...Loading extension module utils...Loading extension module utils... +0: +0: +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/146m14b100m/logs/3302895.out b/146m14b100m/logs/3302895.out new file mode 100644 index 0000000000000000000000000000000000000000..89ba92ff727ee7bdc3f7163d4d3826a3a029ceae --- /dev/null +++ b/146m14b100m/logs/3302895.out @@ -0,0 +1,5664 @@ +Model parameters: d_model 768 ffw_size 3072 kv_size 64 n_heads 12 n_layers 15 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 15 --hidden-size 768 --num-attention-heads 12 --kv-channels 64 --ffn-hidden-size 3072 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-146m14b100mval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_146m14b100mval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_146m14b100m --load checkpoints_146m14b100m --train-weighted-split-paths-path train14b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/3302895.json --zero-stage 0 +START 3302895: Tue 14 Mar 2023 09:39:14 AM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 46.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 35.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 44.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 44.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 50.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 37.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 41.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 44.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 43.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 41.0c 168.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 41.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 52.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 44.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 39.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 46.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 44.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 44.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 45.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 44.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 37.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 44.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 38.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 39.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 50.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 39.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 39.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +5: Launching on nid005905 (5/8), master nid005900 port 9999, GPUs 8, CUDA: True +2: Launching on nid005902 (2/8), master nid005900 port 9999, GPUs 8, CUDA: True +4: Launching on nid005904 (4/8), master nid005900 port 9999, GPUs 8, CUDA: True +3: Launching on nid005903 (3/8), master nid005900 port 9999, GPUs 8, CUDA: True +0: Launching on nid005900 (0/8), master nid005900 port 9999, GPUs 8, CUDA: True +1: Launching on nid005901 (1/8), master nid005900 port 9999, GPUs 8, CUDA: True +7: Launching on nid005907 (7/8), master nid005900 port 9999, GPUs 8, CUDA: True +6: Launching on nid005906 (6/8), master nid005900 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/3302895.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 3072 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 768 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-146m14b100mval +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_146m14b100m +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 12 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 15 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_146m14b100m +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_146m14b100mval +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-03-14 09:43:05,526] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.100 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: [1/1] c++ layer_norm_cuda.o layer_norm_hip_kernel.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so +0: >>> done with compiling and loading fused kernels. Compilation time: 21.419 seconds +0: time to initialize megatron (seconds): 33.858 +0: [after megatron is initialized] datetime: 2023-03-14 09:43:29 +0: building GPT model ... +0: [2023-03-14 09:43:29,994] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-03-14 09:43:29,994] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-03-14 09:43:29,995] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.17 GB, percent = 6.8% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-03-14 09:43:31,967] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=22 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: undo +0: 19: MixedFusedLayerNorm +0: 20: EmbeddingPipe +0: 21: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-03-14 09:43:32,321] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-03-14 09:43:32,322] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.29 GB Max_CA 0 GB +0: [2023-03-14 09:43:32,322] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.19 GB, percent = 6.8% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-03-14 09:43:32,323] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-03-14 09:43:43,380] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-03-14 09:43:43,381] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-03-14 09:43:43,381] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-03-14 09:43:43,385] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-03-14 09:43:43,385] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-03-14 09:43:43,505] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-03-14 09:43:43,506] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.29 GB CA 0.31 GB Max_CA 0 GB +0: [2023-03-14 09:43:43,506] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.87 GB, percent = 6.9% +2: ninja: no work to do. +2: Time to load utils op: 0.36609363555908203 seconds +2: Time to load utils op: 0.36615729331970215 seconds +2: Time to load utils op: 0.391995906829834 seconds +3: Time to load utils op: 0.39168334007263184 secondsTime to load utils op: 0.3656501770019531 seconds +3: +2: Time to load utils op: 0.36617350578308105 seconds +2: Time to load utils op: 0.3661160469055176 secondsTime to load utils op: 0.3661172389984131 seconds +2: +2: Time to load utils op: 0.36629676818847656 seconds +2: Time to load utils op: 0.3661386966705322 seconds +3: Time to load utils op: 0.3653411865234375 seconds +3: Time to load utils op: 0.3656024932861328 seconds +3: Time to load utils op: 0.3653695583343506 seconds +3: Time to load utils op: 0.36563706398010254 seconds +3: Time to load utils op: 0.36548709869384766 seconds +3: Time to load utils op: 0.3656935691833496 seconds +6: Time to load utils op: 0.35836362838745117 seconds +6: Time to load utils op: 0.3583827018737793 seconds +6: Time to load utils op: 0.35840702056884766 seconds +6: Time to load utils op: 0.3584251403808594 seconds +6: Time to load utils op: 0.3584301471710205 seconds +6: Time to load utils op: 0.35843753814697266 seconds +6: Time to load utils op: 0.3584415912628174 seconds +6: Time to load utils op: 0.3584475517272949 seconds +1: Time to load utils op: 0.36415910720825195 seconds +1: Time to load utils op: 0.3641843795776367 seconds +1: Time to load utils op: 0.3642001152038574 seconds +1: Time to load utils op: 0.36421799659729004 seconds +1: Time to load utils op: 0.3642246723175049 seconds +1: Time to load utils op: 0.3642303943634033 secondsTime to load utils op: 0.36424875259399414 seconds +1: +1: Time to load utils op: 0.36424851417541504 seconds +7: Time to load utils op: 0.36205434799194336 seconds +7: Time to load utils op: 0.39148831367492676 secondsTime to load utils op: 0.3620479106903076 seconds +7: +7: Time to load utils op: 0.36208224296569824 seconds +7: Time to load utils op: 0.36211323738098145 seconds +7: Time to load utils op: 0.36202573776245117 seconds +7: Time to load utils op: 0.3620753288269043 secondsTime to load utils op: 0.36212587356567383 seconds +7: +0: Time to load utils op: 0.28010129928588867 seconds +0: Time to load utils op: 0.3712000846862793 seconds +0: Time to load utils op: 0.37123703956604004 seconds +0: Time to load utils op: 0.371227502822876 secondsTime to load utils op: 0.3713722229003906 seconds +0: +0: Time to load utils op: 0.3709721565246582 seconds +0: Time to load utils op: 0.3713674545288086 seconds +0: Time to load utils op: 0.3710489273071289 seconds +4: Time to load utils op: 0.3626668453216553 secondsTime to load utils op: 0.3626711368560791 seconds +4: +4: Time to load utils op: 0.3626880645751953 seconds +4: Time to load utils op: 0.36269378662109375 secondsTime to load utils op: 0.36269569396972656 seconds +4: +4: Time to load utils op: 0.36270928382873535 seconds +4: Time to load utils op: 0.36270594596862793 seconds +4: Time to load utils op: 0.36272215843200684 seconds +5: Time to load utils op: 0.3626680374145508 secondsTime to load utils op: 0.362673282623291 seconds +5: +5: Time to load utils op: 0.36267662048339844 seconds +5: Time to load utils op: 0.36267852783203125 secondsTime to load utils op: 0.3626821041107178 seconds +5: +5: Time to load utils op: 0.36269426345825195 seconds +5: Time to load utils op: 0.3627150058746338 seconds +5: Time to load utils op: 0.3627138137817383 seconds +0: [2023-03-14 09:43:43,897] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-03-14 09:43:43,898] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.31 GB Max_CA 0 GB +0: [2023-03-14 09:43:43,898] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 34.87 GB, percent = 6.9% +7: Time to load utils op: 0.0005505084991455078 seconds +7: Time to load utils op: 0.0005347728729248047 seconds +7: Time to load utils op: 0.0005471706390380859 seconds +7: Time to load utils op: 0.0005617141723632812 secondsTime to load utils op: 0.0006105899810791016 seconds +7: +7: Time to load utils op: 0.0005927085876464844 seconds +7: Time to load utils op: 0.0006022453308105469 seconds +7: Time to load utils op: 0.0003781318664550781 seconds +3: Time to load utils op: 0.0005044937133789062 seconds +3: Time to load utils op: 0.0004355907440185547 seconds +3: Time to load utils op: 0.0004570484161376953 secondsTime to load utils op: 0.0004646778106689453 seconds +3: +3: Time to load utils op: 0.0004410743713378906 seconds +3: Time to load utils op: 0.0004532337188720703 secondsTime to load utils op: 0.00043773651123046875 seconds +3: +3: Time to load utils op: 0.00042939186096191406 seconds +6: Time to load utils op: 0.0009584426879882812 seconds +6: Time to load utils op: 0.000843048095703125 seconds +5: Time to load utils op: 0.0009338855743408203 seconds +6: Time to load utils op: 0.0011134147644042969 seconds +5: Time to load utils op: 0.001188039779663086 secondsTime to load utils op: 0.0012679100036621094 seconds +5: +6: Time to load utils op: 0.0012965202331542969 seconds +6: Time to load utils op: 0.0011703968048095703 secondsTime to load utils op: 0.0011856555938720703 seconds +6: +6: Time to load utils op: 0.0012307167053222656 seconds +5: Time to load utils op: 0.0012006759643554688 seconds +5: Time to load utils op: 0.0012688636779785156 seconds +1: Time to load utils op: 0.000957489013671875 seconds +5: Time to load utils op: 0.0011134147644042969 seconds +5: Time to load utils op: 0.001161336898803711 seconds +4: Time to load utils op: 0.001008749008178711 seconds +6: Time to load utils op: 0.0011913776397705078 seconds +5: Time to load utils op: 0.0012629032135009766 seconds +1: Time to load utils op: 0.0008821487426757812 seconds +2: Time to load utils op: 0.0004773139953613281 seconds +1: Time to load utils op: 0.001125335693359375 seconds +4: Time to load utils op: 0.0010731220245361328 seconds +4: Time to load utils op: 0.0012085437774658203 seconds +2: Time to load utils op: 0.00046944618225097656 seconds +2: Time to load utils op: 0.00043201446533203125 seconds +1: Time to load utils op: 0.0011568069458007812 seconds +4: Time to load utils op: 0.0011870861053466797 seconds +4: Time to load utils op: 0.0012083053588867188 seconds +4: Time to load utils op: 0.0012748241424560547 seconds +4: Time to load utils op: 0.001230478286743164 seconds +2: Time to load utils op: 0.0004203319549560547 seconds +4: Time to load utils op: 0.001325368881225586 seconds +2: Time to load utils op: 0.0004894733428955078 seconds +2: Time to load utils op: 0.0004832744598388672 secondsTime to load utils op: 0.0004792213439941406 seconds +2: +1: Time to load utils op: 0.0012373924255371094 seconds +1: Time to load utils op: 0.00131988525390625 seconds +1: Time to load utils op: 0.0012927055358886719 seconds +2: Time to load utils op: 0.000492095947265625 seconds +1: Time to load utils op: 0.0012917518615722656 seconds +0: Time to load utils op: 0.00047135353088378906 seconds +0: Time to load utils op: 0.00047206878662109375 seconds +0: Time to load utils op: 0.0004832744598388672 secondsTime to load utils op: 0.00048232078552246094 secondsTime to load utils op: 0.0004634857177734375 seconds +0: +0: +0: Time to load utils op: 0.0004451274871826172 seconds +0: Time to load utils op: 0.0004169940948486328 seconds +0: [2023-03-14 09:43:44,070] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-03-14 09:43:44,071] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,071] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,176] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-03-14 09:43:44,177] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,177] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,279] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-03-14 09:43:44,280] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,280] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,380] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-03-14 09:43:44,381] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,381] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,484] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-03-14 09:43:44,484] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,484] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,584] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-03-14 09:43:44,584] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,584] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,691] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-03-14 09:43:44,691] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,691] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.02 GB, percent = 7.0% +0: [2023-03-14 09:43:44,793] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-03-14 09:43:44,793] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-03-14 09:43:44,794] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 35.01 GB, percent = 7.0% +0: [2023-03-14 09:43:44,794] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-03-14 09:43:44,794] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-03-14 09:43:44,794] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-03-14 09:43:44,794] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-03-14 09:43:44,794] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-03-14 09:43:44,795] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-03-14 09:43:44,796] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-03-14 09:43:44,796] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0006103515625 seconds +0: [2023-03-14 09:43:44,797] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-03-14 09:43:44,853] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=22 [0, 22) STAGE_PARAMS=146525952 (146.526M) TOTAL_PARAMS=146525952 (146.526M) UNIQUE_PARAMS=146525952 (146.526M) +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,910] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +0: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +7: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt... +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +1: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +5: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +4: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/mp_rank_00_model_states.pt. +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:44,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-03-14 09:43:45,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-03-14 09:43:45,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-03-14 09:43:45,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-03-14 09:43:45,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-03-14 09:43:45,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-03-14 09:43:45,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-03-14 09:43:45,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-03-14 09:43:45,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-03-14 09:43:45,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-03-14 09:43:45,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-03-14 09:43:45,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-03-14 09:43:45,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-03-14 09:43:45,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-03-14 09:43:45,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-03-14 09:43:45,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-03-14 09:43:45,246] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-03-14 09:43:45,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-03-14 09:43:45,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-03-14 09:43:45,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-03-14 09:43:45,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-03-14 09:43:45,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-03-14 09:43:45,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,479] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-03-14 09:43:45,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-03-14 09:43:45,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-03-14 09:43:45,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-03-14 09:43:45,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-03-14 09:43:45,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-03-14 09:43:45,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-03-14 09:43:45,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-03-14 09:43:45,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-03-14 09:43:45,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-03-14 09:43:45,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-03-14 09:43:45,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,819] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-03-14 09:43:45,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-03-14 09:43:45,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,892] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-03-14 09:43:45,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,894] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,895] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,896] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,897] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,899] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-03-14 09:43:45,901] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,902] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-03-14 09:43:45,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-03-14 09:43:45,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:45,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-03-14 09:43:45,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:45,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:45,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-03-14 09:43:46,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-03-14 09:43:46,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-03-14 09:43:46,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-03-14 09:43:46,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-03-14 09:43:46,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-03-14 09:43:46,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-03-14 09:43:46,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-03-14 09:43:46,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-03-14 09:43:46,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-03-14 09:43:46,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-03-14 09:43:46,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-03-14 09:43:46,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-03-14 09:43:46,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-03-14 09:43:46,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-03-14 09:43:46,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-03-14 09:43:46,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-03-14 09:43:46,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-03-14 09:43:46,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-03-14 09:43:46,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-03-14 09:43:46,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-03-14 09:43:46,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +1: [2023-03-14 09:43:46,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,605] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +0: [2023-03-14 09:43:46,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,607] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +1: [2023-03-14 09:43:46,607] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +0: [2023-03-14 09:43:46,609] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +3: [2023-03-14 09:43:46,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,618] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-03-14 09:43:46,620] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +4: [2023-03-14 09:43:46,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,624] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +4: [2023-03-14 09:43:46,626] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +3: [2023-03-14 09:43:46,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,626] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +2: [2023-03-14 09:43:46,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,627] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +7: [2023-03-14 09:43:46,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,628] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +3: [2023-03-14 09:43:46,628] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +2: [2023-03-14 09:43:46,628] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +6: [2023-03-14 09:43:46,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,630] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +7: [2023-03-14 09:43:46,630] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +0: [2023-03-14 09:43:46,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,630] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +6: [2023-03-14 09:43:46,631] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +0: [2023-03-14 09:43:46,632] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +5: [2023-03-14 09:43:46,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,635] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-03-14 09:43:46,637] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +6: [2023-03-14 09:43:46,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,637] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +6: [2023-03-14 09:43:46,639] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +5: [2023-03-14 09:43:46,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,642] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-03-14 09:43:46,644] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +0: [2023-03-14 09:43:46,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,646] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-03-14 09:43:46,648] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +1: [2023-03-14 09:43:46,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,648] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +1: [2023-03-14 09:43:46,650] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +1: [2023-03-14 09:43:46,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,654] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +1: [2023-03-14 09:43:46,655] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +4: [2023-03-14 09:43:46,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,659] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +4: [2023-03-14 09:43:46,661] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +7: [2023-03-14 09:43:46,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,664] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +7: [2023-03-14 09:43:46,666] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +4: [2023-03-14 09:43:46,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,667] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +7: [2023-03-14 09:43:46,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,668] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +4: [2023-03-14 09:43:46,668] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +6: [2023-03-14 09:43:46,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,668] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +7: [2023-03-14 09:43:46,669] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +6: [2023-03-14 09:43:46,670] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +2: [2023-03-14 09:43:46,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,673] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +0: [2023-03-14 09:43:46,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,674] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +3: [2023-03-14 09:43:46,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,675] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +3: [2023-03-14 09:43:46,675] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +0: [2023-03-14 09:43:46,675] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +3: [2023-03-14 09:43:46,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,676] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +2: [2023-03-14 09:43:46,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,677] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +6: [2023-03-14 09:43:46,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,677] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +6: [2023-03-14 09:43:46,677] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +3: [2023-03-14 09:43:46,678] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +2: [2023-03-14 09:43:46,679] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +6: [2023-03-14 09:43:46,679] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +3: [2023-03-14 09:43:46,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,681] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +0: [2023-03-14 09:43:46,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,683] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +0: [2023-03-14 09:43:46,683] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-03-14 09:43:46,684] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +0: [2023-03-14 09:43:46,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,685] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +5: [2023-03-14 09:43:46,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,685] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +0: [2023-03-14 09:43:46,686] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +5: [2023-03-14 09:43:46,686] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +5: [2023-03-14 09:43:46,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,691] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +6: [2023-03-14 09:43:46,691] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +1: [2023-03-14 09:43:46,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,692] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +6: [2023-03-14 09:43:46,693] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +5: [2023-03-14 09:43:46,693] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +1: [2023-03-14 09:43:46,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,693] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +1: [2023-03-14 09:43:46,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,694] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +1: [2023-03-14 09:43:46,694] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +5: [2023-03-14 09:43:46,694] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,695] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +1: [2023-03-14 09:43:46,695] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +1: [2023-03-14 09:43:46,696] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +5: [2023-03-14 09:43:46,696] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +0: [2023-03-14 09:43:46,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,697] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +1: [2023-03-14 09:43:46,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,698] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +0: [2023-03-14 09:43:46,699] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +1: [2023-03-14 09:43:46,700] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +2: [2023-03-14 09:43:46,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +3: [2023-03-14 09:43:46,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,701] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +2: [2023-03-14 09:43:46,702] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +3: [2023-03-14 09:43:46,703] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +7: [2023-03-14 09:43:46,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,703] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +7: [2023-03-14 09:43:46,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,704] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +7: [2023-03-14 09:43:46,705] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +7: [2023-03-14 09:43:46,706] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +7: [2023-03-14 09:43:46,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,706] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +6: [2023-03-14 09:43:46,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,707] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +2: [2023-03-14 09:43:46,707] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +4: [2023-03-14 09:43:46,707] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,707] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +7: [2023-03-14 09:43:46,708] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +6: [2023-03-14 09:43:46,708] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +2: [2023-03-14 09:43:46,709] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +4: [2023-03-14 09:43:46,709] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +4: [2023-03-14 09:43:46,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,710] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +3: [2023-03-14 09:43:46,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,711] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +4: [2023-03-14 09:43:46,711] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +2: [2023-03-14 09:43:46,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,712] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +7: [2023-03-14 09:43:46,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,713] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +7: [2023-03-14 09:43:46,713] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +4: [2023-03-14 09:43:46,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,714] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +2: [2023-03-14 09:43:46,714] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +7: [2023-03-14 09:43:46,714] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +4: [2023-03-14 09:43:46,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,714] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +4: [2023-03-14 09:43:46,715] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +0: [2023-03-14 09:43:46,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-03-14 09:43:46,716] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +5: [2023-03-14 09:43:46,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,716] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +4: [2023-03-14 09:43:46,717] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +0: [2023-03-14 09:43:46,717] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +7: [2023-03-14 09:43:46,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-03-14 09:43:46,718] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +5: [2023-03-14 09:43:46,718] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +2: [2023-03-14 09:43:46,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,718] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +7: [2023-03-14 09:43:46,719] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +5: [2023-03-14 09:43:46,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,719] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +2: [2023-03-14 09:43:46,720] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +5: [2023-03-14 09:43:46,721] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +6: [2023-03-14 09:43:46,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-03-14 09:43:46,722] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +6: [2023-03-14 09:43:46,722] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-03-14 09:43:46,723] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +6: [2023-03-14 09:43:46,723] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +5: [2023-03-14 09:43:46,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-03-14 09:43:46,728] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +3: [2023-03-14 09:43:46,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-03-14 09:43:46,729] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +5: [2023-03-14 09:43:46,730] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +3: [2023-03-14 09:43:46,730] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +2: [2023-03-14 09:43:46,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-03-14 09:43:46,738] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +2: [2023-03-14 09:43:46,740] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +1: [2023-03-14 09:43:46,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-03-14 09:43:46,742] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +1: [2023-03-14 09:43:46,743] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +4: [2023-03-14 09:43:46,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b100m/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-03-14 09:43:46,744] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +4: [2023-03-14 09:43:46,745] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +0: successfully loaded checkpoint from checkpoints_146m14b100m at iteration 0 +7: time (ms) | load-checkpoint: 1897.37 +0: estimated model parameters: 0.146525952 +0: estimated model parameters without embeddings: 0.106319616 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-03-14 09:43:47 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.050718 seconds +0: number of documents: 28730568 +0: > dataset split: +0: train: +0: document indices in [0, 28730568) total of 28730568 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.088 seconds +0: total number of samples: 6713794 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.012650 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.079 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-03-14 09:44:02 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 17538.74 | train/valid/test-data-iterators-setup: 14676.46 +0: [after training is done] datetime: 2023-03-14 09:44:02 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.801428E+00 | lm loss PPL: 4.476506E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 3302895: Tue 14 Mar 2023 09:44:26 AM EET diff --git a/146m14b100m/sbatch_146m14b100m.sh b/146m14b100m/sbatch_146m14b100m.sh new file mode 100644 index 0000000000000000000000000000000000000000..8f6f105a5d074455e938ec67ef5b6292f6a352d7 --- /dev/null +++ b/146m14b100m/sbatch_146m14b100m.sh @@ -0,0 +1,162 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train100m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=5_517_578 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 55_176 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b100m/sbatch_146m14b100mval.sh b/146m14b100m/sbatch_146m14b100mval.sh new file mode 100644 index 0000000000000000000000000000000000000000..17691c4acacbd1d4560fddde46ae1c7420d08e08 --- /dev/null +++ b/146m14b100m/sbatch_146m14b100mval.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b100mval +VARIANT_CKPT=146m14b100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train14b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b100m/tensorboard_146m14b100m/events.out.tfevents.1678742252.nid005907.24961.0 b/146m14b100m/tensorboard_146m14b100m/events.out.tfevents.1678742252.nid005907.24961.0 new file mode 100644 index 0000000000000000000000000000000000000000..4471dcdc2bab29b04df5f0742da955694d3e6843 --- /dev/null +++ b/146m14b100m/tensorboard_146m14b100m/events.out.tfevents.1678742252.nid005907.24961.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49d7da5a9a46a5c7b18357bc3dfd176aaffc124c0eec5373a18b313de862742 +size 38441522 diff --git a/146m14b100m/tensorboard_146m14b100mval/events.out.tfevents.1678779785.nid005907.106752.0 b/146m14b100m/tensorboard_146m14b100mval/events.out.tfevents.1678779785.nid005907.106752.0 new file mode 100644 index 0000000000000000000000000000000000000000..b83cd61731f7c5ebe4488a3c90fd1f934047a5b0 --- /dev/null +++ b/146m14b100m/tensorboard_146m14b100mval/events.out.tfevents.1678779785.nid005907.106752.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79df0762e1b2704b149386607337b5280cded6efa620593f3c4c8342654bacf8 +size 980 diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..222851d9c6431db0ff797e51d70b66d5951589b4 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.22883262521963457, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.027812784355662805}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05586003903362003, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001210122174269387}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24242206204772987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00406311577145891}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08597342838383007, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016706133404280645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02681915167649114, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007667503131708552}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.1198429847566052, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002900245290872093}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.04138231986575359, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010877957797931874}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.054558298468825164, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011547348959483191}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23869961861095434, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004019678329876834}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08417220195877628, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016113500400264057}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.053912329140769884, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011465126361564196}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23448028224096124, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0038290152808916687}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08303053627222617, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015828450619681193}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..17f108e414511d50601fe383675c5d3fc308eddd --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.15575574742170292, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.017955848150539994}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05111535113081184, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010569639568656138}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24170102692127848, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003887710100922636}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07979284621488447, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014891690285340458}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02319539268897493, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006528037703471886}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10947015615742409, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027377316691087903}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03623174370339374, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009530237429355749}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.050193570501271496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001028472018467999}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23749645057907356, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0038081908356859396}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0784179897604357, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014562658859192477}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.049369870759019566, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010133278968197307}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2327791385209823, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003684550808421332}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07704351061583765, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001426638202203612}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..fd148300f5868e4233c0f21258f6823e800c62b3 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.11268067835710893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.016117537835788783}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05051442816287885, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011508877054317382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23644760012414331, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003674644463876482}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07825326885860989, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001440978853110425}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.022522751858263527, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006351580301931131}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10704237700401503, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026405410434178767}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.035231660164805785, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009297500296840882}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04983511890393805, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011358261496601876}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23226685909234288, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035796541421674885}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07721547440153723, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014241275115813303}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04910718457688571, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011242886510810662}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22930984617447991, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003533204831294875}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07603991744725928, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013988704181332936}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b3ff93b5902d31c0c55b97b3c75229dde7bd3661 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.10023958520271987, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.00971716873348663}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05067153957719335, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001191666778337723}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23959949696648097, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003667056307063945}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07829031027709862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001430784109255991}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02236115249874777, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006367547266607817}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10826670979300317, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002634480071726296}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03498308879465038, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009183882717060898}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.049726423469511824, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011482147824323167}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.235074424902374, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003581208119051607}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07694344069935595, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013971914265873296}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04906977784065461, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011457059871568674}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23202608921361742, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035573781836356197}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07585440271342676, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013846552471020674}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6b9d85426cecdb51885b46707e94e5dd647e8c16 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1250569767383199, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.021628601699908004}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.051227713247576225, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010795326291589034}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24913049586114253, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037758792838945734}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07994546949564424, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014211193648653705}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.023147509973142556, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007025722449267816}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11505322393998348, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002750803567815038}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03616407362539412, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009414194120798864}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.050449943064731946, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010510818215242375}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.24470086974952332, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0036741476280504953}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07875849123282895, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013979148156232973}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04987064991309479, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010470276141344049}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2417182789397841, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036483215475860884}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07780048005428908, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013878222359377333}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..86b055819004a1858e5829f20fc694f32f2ab518 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.13005434642499197, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.010305698888010667}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05083504737121614, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009791389486581562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2513764601791396, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038227866407648994}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08007155614694793, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013739824517440447}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.022503923501631235, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006219870748423658}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11458449377422993, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00275472174018853}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03554619045223363, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009074930456849103}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.049876244862291454, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009596753537288436}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2453692869402309, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003667684923694637}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07854454523551002, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013506756461914645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04940531544011753, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0009556461222745182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.24369845054346978, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036791477193529676}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07777039763023255, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013383824495560735}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..e427e0bb3f193791b3c6af9e2d876c5b34d1572e --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08918754448453994, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015932292071292517}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.13575112269881404, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021807035000954386}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09912682070533965, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001568429644016599}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.010880211822211733, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004652941571441064}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.017760638377981565, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008339736615674986}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.012242312744229137, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005067795444742174}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07960812665447552, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013419368478906097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12335393646166343, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001962960490931355}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08904469475289996, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013408271016179848}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08286720839573229, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014710250802756778}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1265759930387205, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020220996079844036}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09217605866260052, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014433103845410448}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.49687897813178417, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.028753272849352945}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5061b33e12b2dc56aa9597926cbe6bf0476bbfd4 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09108345639125676, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013414051332403126}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14374521382155062, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0019373235502897549}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10341097418229313, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013506246569402033}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.006366424495184133, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00033879797030736587}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.011055391527813261, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000720932463146685}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0073507199277508825, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00038900605048980045}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07176268588179166, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009370468184145725}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11646644523105884, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014990807345932596}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08213432595813003, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009442571910773332}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0861238381653085, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012500535942948073}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.13658818770137882, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0018342040331144193}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09789009627397521, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0012554566404861117}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.4209954239393304, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03176221976327906}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c7281aca00dc356ffb1926acf50c267046a00297 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08962886049037526, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014557995887673056}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.13142388510558053, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018322115687655425}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09759598981071996, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0013580795670625858}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.006459794912288849, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00039736825277010513}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.009646265425710393, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0005575014671908036}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.007001951186011981, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003928300541809073}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07428847306130615, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011113706498795289}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1117787565135024, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001507759191681407}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08154716868365836, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010323345748373103}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08399764483535616, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013459142554786106}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.12372819672301809, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001704125865736863}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.0915845046326089, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001250008944008003}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.3662014163092573, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03512932309274555}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..42390deb686e7fb3998b401c83cd0168db46e27c --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08885799809368973, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019348327165460144}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.11654077328550566, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021991705748568326}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.087537415147445, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015354531166868524}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.009914622438569649, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006522325398007237}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.013569391618498709, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008559625787159471}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.009383113142976338, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004958980836726872}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07476162220353817, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0015778653051055946}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1005389837409404, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018902936583710462}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.07425320055271334, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012438089865080164}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08282976799475586, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018070176003999415}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.10903150651704524, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020555713704425685}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.08158571031468127, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014179098384736939}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.558567693535546, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.059328676012531334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d2815a4056a3a886723775c7830375ba82356bcf --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.03600497009970023, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017041914474905083}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.0405939531186821, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0016845502061290764}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.03122194580188561, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012441428977843587}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00501859392689466, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000548209402419891}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.005490853123571033, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006056703124515839}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.003984021707078218, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003464773884452124}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.03120550445864107, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014566603437604745}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03575355085817033, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0014586794204117094}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.027162299961073107, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010492438955382582}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.03366739502631602, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016160090202211065}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.037710615384056095, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0015687838454172922}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.028997162012914524, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001157930832064929}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.2666315699216447, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04602526809695048}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..84cd1df0b364f00469a6d0146f5adfd98350c4eb --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.006709044851699656, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008832342566561942}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.006512638750272697, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0007563435499462167}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.00535516413631097, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0005989788021678713}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.001281837491890342, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000304608742717695}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0011010851742601637, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0002253109954852027}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0009343759708146302, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00019124522298799218}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.005724582318965228, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0007159612442775775}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.00580938066432881, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0006776468535233207}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.004672160110494701, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005068592712515271}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.006363502163931598, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0008361966843529074}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.00622891484597148, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0007333253589288016}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.0050875569393090715, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00057132173191095}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 7.886206328273586e-06, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.221361669315733e-05}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..2fac516cbba4388c86f5f7134f88a548995b0e70 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.8343403861560295, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08848890438051935}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.04482003180956012, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001997077263135176}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.05496626728471005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0019739634808670177}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.04331398539056834, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015510481823475862}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.010968928668382354, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006660784749433778}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.013837637563125265, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006656353732837446}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.010736674908829128, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005134871607341201}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.04082014014811183, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017013420099803993}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.05346730136724671, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019401768471861783}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.04135106793261327, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014779152643788263}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.03393512996088468, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001554141532768072}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.041493274650743935, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0015131168604173573}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.03267655491671789, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011867994802981648}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5fafd29b09318b59cb87fc98e98603246ac866a7 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.9406196061775116, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.10258259522175316}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.2787618289181687, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005384508455643661}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.19799693389239711, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003182542201890721}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.18693446462920227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002787464540486619}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1437328248773931, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005092642043712837}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.05733865153595204, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001383897371483806}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.05706123212585479, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013015405321589112}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2302703496004102, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005102072689082256}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.15449193654318977, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002518846268029969}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.14444789684107232, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0021086022369478444}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.24427645994280608, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005221824942580713}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.1627325478139627, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026692987289094598}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.1553009809575637, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0023739934749666684}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..46648fd65057ca9aa79bf185db0d12dccd688da2 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.0869989165891227, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.16013219623315433}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3978056637157276, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006383333506870748}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.19769999489666457, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003338189945298019}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.21814903833369859, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0031866040091908464}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.24931076985350456, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.006415148520407114}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07401280693073875, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016105308975029838}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.08456553732934603, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016022980395205303}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33763473031412045, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006269179792430952}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.14877296254487207, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024783679014017384}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.1673148071155831, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002388385786508644}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.357819032438701, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006300016081780083}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.16548809371217418, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028048620055066233}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.18463628783286154, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002693460028052387}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b9a3a84542f10decb98e3398cfbf0751baf8be3e --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.7011916246059053, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14593162958819425}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.41822296870814335, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006460520453011632}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2071298534116268, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034383519555611984}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.23033091532940253, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0032924392197765163}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2702971908568115, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00643370737283098}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.08487129021823478, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00173547236263484}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.09792307241998234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017582063387383436}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3572174834274431, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006354476895716739}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1574631215849211, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002552084505977488}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.17881874820814597, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0024988437464002213}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.37863570607432673, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006392889923077396}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.17478692469025933, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002894063899840589}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.19693757021044234, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002816228141853166}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..1b7ab2876d8941e1f76bbe11fe5f0717b4d22a8f --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.378358579277868, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.15272962158453546}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.45882713034371453, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006415593061369954}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2255093763170198, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003436954476710523}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.25232175927064066, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003252006309486809}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.299477006298075, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0065384661124707425}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.09678212220073933, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018066974497821132}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.11225233292150326, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018233487430859975}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.39417067582169574, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006410266792021505}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.17324797729857383, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002580532804445032}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.19796718724270523, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002503553099442586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.41668133719539296, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006408889836300579}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.19157341143212034, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002922660612765926}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2170954167942643, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002802720816613775}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..48c6c3241d996e37c031c4a4ca9555d1cffa3b53 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.890257220178811, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13420785403183202}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.475398003113497, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.006327908810294356}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.24024724261621913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034704603090637783}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2681436632992601, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.003258641900873387}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.31255952474835047, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.006463307595299056}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.10772173878741007, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018408748834838522}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.12437458519230632, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0018508476727143834}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.40683490044030046, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.006313377793533003}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.18544055623635025, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026168715932805803}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.21086184259770677, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002497969164716478}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4295272827032871, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.006304887170894177}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.20394125936151544, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029282150904557346}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23024299776500148, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0027826284448924806}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_0.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..6c2624bfab8f1d0eb4728bcba781c8fcf200eb10 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.1022798973400133, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016195028484036645}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2472257110564639, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036244015531403696}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.14196393966710072, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002104608543541609}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01730961638211292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007982969706574101}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.043897826329439435, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0019301619560314596}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.02440112287257367, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010880062384939326}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08904777578480996, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013272362515087367}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.2169745636685914, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003060682824750885}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12383517753988749, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017149719617583993}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0783856177341214, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012799850206698408}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.1915896888630931, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029773166596938527}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.10898759142531142, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001653332454666073}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.9016790167616279, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08554255656260222}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_1.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..d81bfa79a9c1040f4fa620466481cb1001ef1e1d --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08983663259289545, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014656640724220792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.22526078131503421, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034677534855223576}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12680695998689423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001991458093221162}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012790758078394766, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006274060760347892}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03314630221773423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016600656887166795}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.018226182677564602, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008914321799827453}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07859662487412902, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012275599440022234}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19823099261868493, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002945509989290099}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11108456989593447, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016654771979817452}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06928351933199965, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011286042016073875}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.1759900951750178, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002828460597285845}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09809900796895703, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015505955006710195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6174225365598259, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05127643687143349}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_2.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c6b321eec9eaaad0fdf887ad4a53ecc83722f84b --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08766778881774849, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014891572798897187}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.22075516476448948, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00347829704174308}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12387534340787068, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002014018603012206}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.013027956332330589, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000726229100756286}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03360633434332384, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017823718475256693}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.018525660614874147, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010059837278380754}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.077990231219861, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012548203942130344}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.19754633749547196, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029825381740120733}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11034857045454226, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001695778207988071}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06719420386174171, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011491767128342227}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17095111398494645, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002777030573630023}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09519799112177334, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015659285260077656}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6922674508727583, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09204471088454393}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_3.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8065b8e8f24f28000544857408908421314edc08 --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08698815906142213, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017345476005806628}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.210024658038518, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003645324207351596}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11970202642407167, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021197440846560966}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012464843327563693, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006606924610361004}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03149526320515733, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016746812407298752}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.017445968725527953, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009133145642922374}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07749990811103293, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014590057257022838}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18830492688774997, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031305358996651976}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10682732750074307, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017831957682131314}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06755902721539477, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013628857922879717}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16409288081988832, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002865541107683648}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09289835920150129, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001624619431174144}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6181747520447193, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.049332282488453325}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_4.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..de8939c669cb2c0e57bb617974804fa47d3afe9d --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.030544594130750835, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0021330267604427484}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.054628538366303006, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0032412927121272896}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.03470890743725083, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020520465159130927}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.004045481859690613, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005769901773501374}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.008512380598717688, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010089042236144311}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005129090360707225, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0006597787970364636}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.026676015773228576, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001870277821695184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04805945027625291, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002827982846252903}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.030241620839919433, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017471531374678303}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024493166051240776, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018426049402895288}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.042743950057792045, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025687027564754382}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.027168700375590663, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016300199179532333}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.450056497825642, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.14846722588344094}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_5.json b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..7521c2a4efa74f009f218a15659c80adb2c2e6de --- /dev/null +++ b/146m14b14b/evaluation/generation/agg.146m14b14b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.002880347249708247, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008552363057144186}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0023662681113285227, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006671844202651334}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.002541504826499715, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007284967864016516}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0003609205260148656, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00016999661105349456}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00029366489965824033, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00014802866526663323}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0003192230499539746, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0001551634211277905}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002324954694563953, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000660753735821071}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0019645608817565226, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005537267229057413}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.002078487848546464, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00058210203581558}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0024628781194784405, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007073962748883953}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.002062973958092467, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005804863416586463}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.002192963200921734, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0006172749787154629}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 4.701645545451422e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 4.423933320700958e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fcd05f66840e70a37ea5985a598c3da009405ac1 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0b739ab0526f3cbd3013326e1362fc5ccc4621e14d110032d04db78191489c +size 4147931 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0f0fcc9e3aea5434e8615c352ba73da0006aa99d --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22a93307676fd6492e8ce043b1e42e09ad05a07f9b254a2d333b49efbbc3d37 +size 5123140 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b52558268e84cf1edeab5681a67414bb2fa6dff1 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb56c87a6b527f6ea8b24afaed9b169f99f53c165c583d7415c7cf85c6cfdde5 +size 5982496 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..250106c5f335272fb3ce766e924267b5d83997c0 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5affc414132e6b1be2578af08f55e623c0178c5d2335bca358ab7104893d2b6 +size 6923796 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3e28109ad53f1073916f88f50c956c256253b0a7 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6f45b221ef4c21942f22ed42f595c243a21b8ac509ebee87b302f424dae07c +size 7814339 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..15606a59cd24c345e1034bf4a693c53802fd2302 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6aed17a2f66e24bab2fbb4933fc2dab801e5162bc57a9e1e49465c8868797d +size 8705213 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3ec526a0574412a72ec9cc08537fce58c0daba44 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8432642b5bb43018c142a0caabaa725d9de37bd759cb24bbdb5986290ae35929 +size 7589460 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..74c4dbc3da48f7ad036f945f1a9d70b7f37f5717 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e04c387bc7c8add6fe49684e16b394551d43429b4ea26a801a71fdb8ef8ff3 +size 13357920 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0b005cf338679ef51873ed31ec55229622dbb992 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7d4baa6a2a113ece169114559348b001a2fbd0b08bd48ab94b0208e41ee936 +size 18952262 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2768b74bd524ab5516a36e0bd265af7149adf79b --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7cbb95ef9ef655403db257cbb3ad5af5c1f4a918ed1e414211be2836ae7c635 +size 24342715 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..01df12265c641238163657431b076740e666abcc --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e9c5ab8fab308a11ad0cd09700072ec1a0c9f273378c396a8cbec094a775c7 +size 29466721 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..57c1eb6e2eca85a70eb402843630b3cbca41f944 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400ac21f571e0bc6f8b922b563cfd48248ccb98345896480315c930b8a5e9373 +size 34797758 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6ad47188cd3f692225478a38f1c4aa10bb7fb1f4 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f4d64cb1b4120d2eef3f3130f3f6dcf665d3df3b511b02504a6f88cc04c841a +size 3801612 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..63c20ed172e44530e9d1f45557f28c70776dd99e --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7922eb16a8418d87c8f3461122b55f122c41f651cbf04692c1f77e6968d19c +size 5009284 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..94fa4514cddbaaae0f58133520f7087ed51e617a --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcdbd7fe9b93832f08732376599a59bb223c5802bf6b34bff4a6e1969eb3c9b3 +size 5979130 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a7b2c0c2e530539a67618cb6ddd84ab7a78bada1 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab29b9cc5d226597621c941437f28e0b00d8aec051afe040727dbc964470dfc9 +size 7062135 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..62bfabcd654e97cdd9363df812ba6a98cf709a61 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a5c68fbf10ee27c8ddc61200f39b279004a6a40e3c69faa01ee7b232507286 +size 8152288 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b1f69ea0a98098890bff43348e267c40e980b2d9 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9528cc10de78178d3dda954a80f38b5d4b35ee54a962d7eb10f6565b32a1148b +size 9249579 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_0.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..eeea55d706fe10702ea2c17c64479cca06be59f5 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f1e871f9c3bae146eabdd5b665348395340843e8eb8e5475d94e435948252d +size 2835114 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_1.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6af5b777b8344641db20262d73c55caa7eac1a61 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e2abd11d6d026d75b87f99b44e2fa995b257774fbf733fc59f76a31fbdd42c4 +size 5111571 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_2.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..94a03056917aefbbc3747c0424f4c493a22fc8ac --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0164700e6bc04737337a37a7126595e0cfbed08a82fb8c7f973a47049d7b0a7b +size 7384591 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_3.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..60ad150a4cfcc16368c8b853c45566dacb5d6a64 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15bc9d4d0bd08eae8c847be6b4088f9414fe9c82a291122e38f558f7541af4c1 +size 9654468 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_4.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0ea9559c0117b0c2c0f853dbf1608d58c3fbfef6 --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb6c2cf6f4862baeea7b92ceece107ac78eb12d4a3ea4d5cb2e5cb789df9c96 +size 11675335 diff --git a/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_5.jsonl b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fa7915424778ca079ef2476cb5a3ede44ccf7b5c --- /dev/null +++ b/146m14b14b/evaluation/generation/examples.146m14b14b_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c857092005ae2b65a24e5554ca060cf84bfef33b0c30c313586937e0535862f4 +size 13897559 diff --git a/146m14b14b/evaluation/generation/merged.csv b/146m14b14b/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..f0fb35beeb772ace76fde15aebe129adfafec99f --- /dev/null +++ b/146m14b14b/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.010736674908829128 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.010736674908829128 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.05706123212585479 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.05706123212585479 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.08456553732934603 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.08456553732934603 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.09792307241998234 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.09792307241998234 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.11225233292150326 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.11225233292150326 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.12437458519230632 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.12437458519230632 +e2e_nlg_cleaned,5,average,multiple,0.08115223914963698 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.02440112287257367 +gem_xsum,0,median,rouge2_fmeasure,0.02440112287257367 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.018226182677564602 +gem_xsum,1,median,rouge2_fmeasure,0.018226182677564602 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.018525660614874147 +gem_xsum,2,median,rouge2_fmeasure,0.018525660614874147 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.017445968725527953 +gem_xsum,3,median,rouge2_fmeasure,0.017445968725527953 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005129090360707225 +gem_xsum,4,median,rouge2_fmeasure,0.005129090360707225 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0003192230499539746 +gem_xsum,5,median,rouge2_fmeasure,0.0003192230499539746 +gem_xsum,5,average,multiple,0.014007874716866929 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.04138231986575359 +web_nlg_en,0,median,rouge2_fmeasure,0.04138231986575359 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03623174370339374 +web_nlg_en,1,median,rouge2_fmeasure,0.03623174370339374 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.035231660164805785 +web_nlg_en,2,median,rouge2_fmeasure,0.035231660164805785 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.03498308879465038 +web_nlg_en,3,median,rouge2_fmeasure,0.03498308879465038 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.03616407362539412 +web_nlg_en,4,median,rouge2_fmeasure,0.03616407362539412 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03554619045223363 +web_nlg_en,5,median,rouge2_fmeasure,0.03554619045223363 +web_nlg_en,5,average,multiple,0.03658984610103854 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.012242312744229137 +wiki_lingua_en,0,median,rouge2_fmeasure,0.012242312744229137 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.0073507199277508825 +wiki_lingua_en,1,median,rouge2_fmeasure,0.0073507199277508825 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.007001951186011981 +wiki_lingua_en,2,median,rouge2_fmeasure,0.007001951186011981 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.009383113142976338 +wiki_lingua_en,3,median,rouge2_fmeasure,0.009383113142976338 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.003984021707078218 +wiki_lingua_en,4,median,rouge2_fmeasure,0.003984021707078218 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0009343759708146302 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0009343759708146302 +wiki_lingua_en,5,average,multiple,0.006816082446476864 diff --git a/146m14b14b/evaluation/generation/merged.json b/146m14b14b/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..0edd637f4c110755a25817115749876c3600694e --- /dev/null +++ b/146m14b14b/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.22883262521963457, "bleu_stderr": 0.027812784355662805, "rouge1_fmeasure": 0.08597342838383007, "rouge1_fmeasure_stderr": 0.0016706133404280645, "rouge1_precision": 0.05586003903362003, "rouge1_precision_stderr": 0.001210122174269387, "rouge1_recall": 0.24242206204772987, "rouge1_recall_stderr": 0.00406311577145891, "rouge2_fmeasure": 0.04138231986575359, "rouge2_fmeasure_stderr": 0.0010877957797931874, "rouge2_precision": 0.02681915167649114, "rouge2_precision_stderr": 0.0007667503131708552, "rouge2_recall": 0.1198429847566052, "rouge2_recall_stderr": 0.002900245290872093, "rougeL_fmeasure": 0.08417220195877628, "rougeL_fmeasure_stderr": 0.0016113500400264057, "rougeL_precision": 0.054558298468825164, "rougeL_precision_stderr": 0.0011547348959483191, "rougeL_recall": 0.23869961861095434, "rougeL_recall_stderr": 0.004019678329876834, "rougeLsum_fmeasure": 0.08303053627222617, "rougeLsum_fmeasure_stderr": 0.0015828450619681193, "rougeLsum_precision": 0.053912329140769884, "rougeLsum_precision_stderr": 0.0011465126361564196, "rougeLsum_recall": 0.23448028224096124, "rougeLsum_recall_stderr": 0.0038290152808916687}}, "1": {"PALM_prompt": {"bleu": 0.15575574742170292, "bleu_stderr": 0.017955848150539994, "rouge1_fmeasure": 0.07979284621488447, "rouge1_fmeasure_stderr": 0.0014891690285340458, "rouge1_precision": 0.05111535113081184, "rouge1_precision_stderr": 0.0010569639568656138, "rouge1_recall": 0.24170102692127848, "rouge1_recall_stderr": 0.003887710100922636, "rouge2_fmeasure": 0.03623174370339374, "rouge2_fmeasure_stderr": 0.0009530237429355749, "rouge2_precision": 0.02319539268897493, "rouge2_precision_stderr": 0.0006528037703471886, "rouge2_recall": 0.10947015615742409, "rouge2_recall_stderr": 0.0027377316691087903, "rougeL_fmeasure": 0.0784179897604357, "rougeL_fmeasure_stderr": 0.0014562658859192477, "rougeL_precision": 0.050193570501271496, "rougeL_precision_stderr": 0.001028472018467999, "rougeL_recall": 0.23749645057907356, "rougeL_recall_stderr": 0.0038081908356859396, "rougeLsum_fmeasure": 0.07704351061583765, "rougeLsum_fmeasure_stderr": 0.001426638202203612, "rougeLsum_precision": 0.049369870759019566, "rougeLsum_precision_stderr": 0.0010133278968197307, "rougeLsum_recall": 0.2327791385209823, "rougeLsum_recall_stderr": 0.003684550808421332}}, "2": {"PALM_prompt": {"bleu": 0.11268067835710893, "bleu_stderr": 0.016117537835788783, "rouge1_fmeasure": 0.07825326885860989, "rouge1_fmeasure_stderr": 0.001440978853110425, "rouge1_precision": 0.05051442816287885, "rouge1_precision_stderr": 0.0011508877054317382, "rouge1_recall": 0.23644760012414331, "rouge1_recall_stderr": 0.003674644463876482, "rouge2_fmeasure": 0.035231660164805785, "rouge2_fmeasure_stderr": 0.0009297500296840882, "rouge2_precision": 0.022522751858263527, "rouge2_precision_stderr": 0.0006351580301931131, "rouge2_recall": 0.10704237700401503, "rouge2_recall_stderr": 0.0026405410434178767, "rougeL_fmeasure": 0.07721547440153723, "rougeL_fmeasure_stderr": 0.0014241275115813303, "rougeL_precision": 0.04983511890393805, "rougeL_precision_stderr": 0.0011358261496601876, "rougeL_recall": 0.23226685909234288, "rougeL_recall_stderr": 0.0035796541421674885, "rougeLsum_fmeasure": 0.07603991744725928, "rougeLsum_fmeasure_stderr": 0.0013988704181332936, "rougeLsum_precision": 0.04910718457688571, "rougeLsum_precision_stderr": 0.0011242886510810662, "rougeLsum_recall": 0.22930984617447991, "rougeLsum_recall_stderr": 0.003533204831294875}}, "3": {"PALM_prompt": {"bleu": 0.10023958520271987, "bleu_stderr": 0.00971716873348663, "rouge1_fmeasure": 0.07829031027709862, "rouge1_fmeasure_stderr": 0.001430784109255991, "rouge1_precision": 0.05067153957719335, "rouge1_precision_stderr": 0.001191666778337723, "rouge1_recall": 0.23959949696648097, "rouge1_recall_stderr": 0.003667056307063945, "rouge2_fmeasure": 0.03498308879465038, "rouge2_fmeasure_stderr": 0.0009183882717060898, "rouge2_precision": 0.02236115249874777, "rouge2_precision_stderr": 0.0006367547266607817, "rouge2_recall": 0.10826670979300317, "rouge2_recall_stderr": 0.002634480071726296, "rougeL_fmeasure": 0.07694344069935595, "rougeL_fmeasure_stderr": 0.0013971914265873296, "rougeL_precision": 0.049726423469511824, "rougeL_precision_stderr": 0.0011482147824323167, "rougeL_recall": 0.235074424902374, "rougeL_recall_stderr": 0.003581208119051607, "rougeLsum_fmeasure": 0.07585440271342676, "rougeLsum_fmeasure_stderr": 0.0013846552471020674, "rougeLsum_precision": 0.04906977784065461, "rougeLsum_precision_stderr": 0.0011457059871568674, "rougeLsum_recall": 0.23202608921361742, "rougeLsum_recall_stderr": 0.0035573781836356197}}, "4": {"PALM_prompt": {"bleu": 0.1250569767383199, "bleu_stderr": 0.021628601699908004, "rouge1_fmeasure": 0.07994546949564424, "rouge1_fmeasure_stderr": 0.0014211193648653705, "rouge1_precision": 0.051227713247576225, "rouge1_precision_stderr": 0.0010795326291589034, "rouge1_recall": 0.24913049586114253, "rouge1_recall_stderr": 0.0037758792838945734, "rouge2_fmeasure": 0.03616407362539412, "rouge2_fmeasure_stderr": 0.0009414194120798864, "rouge2_precision": 0.023147509973142556, "rouge2_precision_stderr": 0.0007025722449267816, "rouge2_recall": 0.11505322393998348, "rouge2_recall_stderr": 0.002750803567815038, "rougeL_fmeasure": 0.07875849123282895, "rougeL_fmeasure_stderr": 0.0013979148156232973, "rougeL_precision": 0.050449943064731946, "rougeL_precision_stderr": 0.0010510818215242375, "rougeL_recall": 0.24470086974952332, "rougeL_recall_stderr": 0.0036741476280504953, "rougeLsum_fmeasure": 0.07780048005428908, "rougeLsum_fmeasure_stderr": 0.0013878222359377333, "rougeLsum_precision": 0.04987064991309479, "rougeLsum_precision_stderr": 0.0010470276141344049, "rougeLsum_recall": 0.2417182789397841, "rougeLsum_recall_stderr": 0.0036483215475860884}}, "5": {"PALM_prompt": {"bleu": 0.13005434642499197, "bleu_stderr": 0.010305698888010667, "rouge1_fmeasure": 0.08007155614694793, "rouge1_fmeasure_stderr": 0.0013739824517440447, "rouge1_precision": 0.05083504737121614, "rouge1_precision_stderr": 0.0009791389486581562, "rouge1_recall": 0.2513764601791396, "rouge1_recall_stderr": 0.0038227866407648994, "rouge2_fmeasure": 0.03554619045223363, "rouge2_fmeasure_stderr": 0.0009074930456849103, "rouge2_precision": 0.022503923501631235, "rouge2_precision_stderr": 0.0006219870748423658, "rouge2_recall": 0.11458449377422993, "rouge2_recall_stderr": 0.00275472174018853, "rougeL_fmeasure": 0.07854454523551002, "rougeL_fmeasure_stderr": 0.0013506756461914645, "rougeL_precision": 0.049876244862291454, "rougeL_precision_stderr": 0.0009596753537288436, "rougeL_recall": 0.2453692869402309, "rougeL_recall_stderr": 0.003667684923694637, "rougeLsum_fmeasure": 0.07777039763023255, "rougeLsum_fmeasure_stderr": 0.0013383824495560735, "rougeLsum_precision": 0.04940531544011753, "rougeLsum_precision_stderr": 0.0009556461222745182, "rougeLsum_recall": 0.24369845054346978, "rougeLsum_recall_stderr": 0.0036791477193529676}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.49687897813178417, "bleu_stderr": 0.028753272849352945, "rouge1_fmeasure": 0.09912682070533965, "rouge1_fmeasure_stderr": 0.001568429644016599, "rouge1_precision": 0.08918754448453994, "rouge1_precision_stderr": 0.0015932292071292517, "rouge1_recall": 0.13575112269881404, "rouge1_recall_stderr": 0.0021807035000954386, "rouge2_fmeasure": 0.012242312744229137, "rouge2_fmeasure_stderr": 0.0005067795444742174, "rouge2_precision": 0.010880211822211733, "rouge2_precision_stderr": 0.0004652941571441064, "rouge2_recall": 0.017760638377981565, "rouge2_recall_stderr": 0.0008339736615674986, "rougeL_fmeasure": 0.08904469475289996, "rougeL_fmeasure_stderr": 0.0013408271016179848, "rougeL_precision": 0.07960812665447552, "rougeL_precision_stderr": 0.0013419368478906097, "rougeL_recall": 0.12335393646166343, "rougeL_recall_stderr": 0.001962960490931355, "rougeLsum_fmeasure": 0.09217605866260052, "rougeLsum_fmeasure_stderr": 0.0014433103845410448, "rougeLsum_precision": 0.08286720839573229, "rougeLsum_precision_stderr": 0.0014710250802756778, "rougeLsum_recall": 0.1265759930387205, "rougeLsum_recall_stderr": 0.0020220996079844036}}, "1": {"tldr_en": {"bleu": 0.4209954239393304, "bleu_stderr": 0.03176221976327906, "rouge1_fmeasure": 0.10341097418229313, "rouge1_fmeasure_stderr": 0.0013506246569402033, "rouge1_precision": 0.09108345639125676, "rouge1_precision_stderr": 0.0013414051332403126, "rouge1_recall": 0.14374521382155062, "rouge1_recall_stderr": 0.0019373235502897549, "rouge2_fmeasure": 0.0073507199277508825, "rouge2_fmeasure_stderr": 0.00038900605048980045, "rouge2_precision": 0.006366424495184133, "rouge2_precision_stderr": 0.00033879797030736587, "rouge2_recall": 0.011055391527813261, "rouge2_recall_stderr": 0.000720932463146685, "rougeL_fmeasure": 0.08213432595813003, "rougeL_fmeasure_stderr": 0.0009442571910773332, "rougeL_precision": 0.07176268588179166, "rougeL_precision_stderr": 0.0009370468184145725, "rougeL_recall": 0.11646644523105884, "rougeL_recall_stderr": 0.0014990807345932596, "rougeLsum_fmeasure": 0.09789009627397521, "rougeLsum_fmeasure_stderr": 0.0012554566404861117, "rougeLsum_precision": 0.0861238381653085, "rougeLsum_precision_stderr": 0.0012500535942948073, "rougeLsum_recall": 0.13658818770137882, "rougeLsum_recall_stderr": 0.0018342040331144193}}, "2": {"tldr_en": {"bleu": 0.3662014163092573, "bleu_stderr": 0.03512932309274555, "rouge1_fmeasure": 0.09759598981071996, "rouge1_fmeasure_stderr": 0.0013580795670625858, "rouge1_precision": 0.08962886049037526, "rouge1_precision_stderr": 0.0014557995887673056, "rouge1_recall": 0.13142388510558053, "rouge1_recall_stderr": 0.0018322115687655425, "rouge2_fmeasure": 0.007001951186011981, "rouge2_fmeasure_stderr": 0.0003928300541809073, "rouge2_precision": 0.006459794912288849, "rouge2_precision_stderr": 0.00039736825277010513, "rouge2_recall": 0.009646265425710393, "rouge2_recall_stderr": 0.0005575014671908036, "rougeL_fmeasure": 0.08154716868365836, "rougeL_fmeasure_stderr": 0.0010323345748373103, "rougeL_precision": 0.07428847306130615, "rougeL_precision_stderr": 0.0011113706498795289, "rougeL_recall": 0.1117787565135024, "rougeL_recall_stderr": 0.001507759191681407, "rougeLsum_fmeasure": 0.0915845046326089, "rougeLsum_fmeasure_stderr": 0.001250008944008003, "rougeLsum_precision": 0.08399764483535616, "rougeLsum_precision_stderr": 0.0013459142554786106, "rougeLsum_recall": 0.12372819672301809, "rougeLsum_recall_stderr": 0.001704125865736863}}, "3": {"tldr_en": {"bleu": 0.558567693535546, "bleu_stderr": 0.059328676012531334, "rouge1_fmeasure": 0.087537415147445, "rouge1_fmeasure_stderr": 0.0015354531166868524, "rouge1_precision": 0.08885799809368973, "rouge1_precision_stderr": 0.0019348327165460144, "rouge1_recall": 0.11654077328550566, "rouge1_recall_stderr": 0.0021991705748568326, "rouge2_fmeasure": 0.009383113142976338, "rouge2_fmeasure_stderr": 0.0004958980836726872, "rouge2_precision": 0.009914622438569649, "rouge2_precision_stderr": 0.0006522325398007237, "rouge2_recall": 0.013569391618498709, "rouge2_recall_stderr": 0.0008559625787159471, "rougeL_fmeasure": 0.07425320055271334, "rougeL_fmeasure_stderr": 0.0012438089865080164, "rougeL_precision": 0.07476162220353817, "rougeL_precision_stderr": 0.0015778653051055946, "rougeL_recall": 0.1005389837409404, "rougeL_recall_stderr": 0.0018902936583710462, "rougeLsum_fmeasure": 0.08158571031468127, "rougeLsum_fmeasure_stderr": 0.0014179098384736939, "rougeLsum_precision": 0.08282976799475586, "rougeLsum_precision_stderr": 0.0018070176003999415, "rougeLsum_recall": 0.10903150651704524, "rougeLsum_recall_stderr": 0.0020555713704425685}}, "4": {"tldr_en": {"bleu": 0.2666315699216447, "bleu_stderr": 0.04602526809695048, "rouge1_fmeasure": 0.03122194580188561, "rouge1_fmeasure_stderr": 0.0012441428977843587, "rouge1_precision": 0.03600497009970023, "rouge1_precision_stderr": 0.0017041914474905083, "rouge1_recall": 0.0405939531186821, "rouge1_recall_stderr": 0.0016845502061290764, "rouge2_fmeasure": 0.003984021707078218, "rouge2_fmeasure_stderr": 0.0003464773884452124, "rouge2_precision": 0.00501859392689466, "rouge2_precision_stderr": 0.000548209402419891, "rouge2_recall": 0.005490853123571033, "rouge2_recall_stderr": 0.0006056703124515839, "rougeL_fmeasure": 0.027162299961073107, "rougeL_fmeasure_stderr": 0.0010492438955382582, "rougeL_precision": 0.03120550445864107, "rougeL_precision_stderr": 0.0014566603437604745, "rougeL_recall": 0.03575355085817033, "rougeL_recall_stderr": 0.0014586794204117094, "rougeLsum_fmeasure": 0.028997162012914524, "rougeLsum_fmeasure_stderr": 0.001157930832064929, "rougeLsum_precision": 0.03366739502631602, "rougeLsum_precision_stderr": 0.0016160090202211065, "rougeLsum_recall": 0.037710615384056095, "rougeLsum_recall_stderr": 0.0015687838454172922}}, "5": {"tldr_en": {"bleu": 7.886206328273586e-06, "bleu_stderr": 1.221361669315733e-05, "rouge1_fmeasure": 0.00535516413631097, "rouge1_fmeasure_stderr": 0.0005989788021678713, "rouge1_precision": 0.006709044851699656, "rouge1_precision_stderr": 0.0008832342566561942, "rouge1_recall": 0.006512638750272697, "rouge1_recall_stderr": 0.0007563435499462167, "rouge2_fmeasure": 0.0009343759708146302, "rouge2_fmeasure_stderr": 0.00019124522298799218, "rouge2_precision": 0.001281837491890342, "rouge2_precision_stderr": 0.000304608742717695, "rouge2_recall": 0.0011010851742601637, "rouge2_recall_stderr": 0.0002253109954852027, "rougeL_fmeasure": 0.004672160110494701, "rougeL_fmeasure_stderr": 0.0005068592712515271, "rougeL_precision": 0.005724582318965228, "rougeL_precision_stderr": 0.0007159612442775775, "rougeL_recall": 0.00580938066432881, "rougeL_recall_stderr": 0.0006776468535233207, "rougeLsum_fmeasure": 0.0050875569393090715, "rougeLsum_fmeasure_stderr": 0.00057132173191095, "rougeLsum_precision": 0.006363502163931598, "rougeLsum_precision_stderr": 0.0008361966843529074, "rougeLsum_recall": 0.00622891484597148, "rougeLsum_recall_stderr": 0.0007333253589288016}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.8343403861560295, "bleu_stderr": 0.08848890438051935, "rouge1_fmeasure": 0.04331398539056834, "rouge1_fmeasure_stderr": 0.0015510481823475862, "rouge1_precision": 0.04482003180956012, "rouge1_precision_stderr": 0.001997077263135176, "rouge1_recall": 0.05496626728471005, "rouge1_recall_stderr": 0.0019739634808670177, "rouge2_fmeasure": 0.010736674908829128, "rouge2_fmeasure_stderr": 0.0005134871607341201, "rouge2_precision": 0.010968928668382354, "rouge2_precision_stderr": 0.0006660784749433778, "rouge2_recall": 0.013837637563125265, "rouge2_recall_stderr": 0.0006656353732837446, "rougeL_fmeasure": 0.04135106793261327, "rougeL_fmeasure_stderr": 0.0014779152643788263, "rougeL_precision": 0.04082014014811183, "rougeL_precision_stderr": 0.0017013420099803993, "rougeL_recall": 0.05346730136724671, "rougeL_recall_stderr": 0.0019401768471861783, "rougeLsum_fmeasure": 0.03267655491671789, "rougeLsum_fmeasure_stderr": 0.0011867994802981648, "rougeLsum_precision": 0.03393512996088468, "rougeLsum_precision_stderr": 0.001554141532768072, "rougeLsum_recall": 0.041493274650743935, "rougeLsum_recall_stderr": 0.0015131168604173573}}, "1": {"generate_text_restaurant": {"bleu": 2.9406196061775116, "bleu_stderr": 0.10258259522175316, "rouge1_fmeasure": 0.18693446462920227, "rouge1_fmeasure_stderr": 0.002787464540486619, "rouge1_precision": 0.2787618289181687, "rouge1_precision_stderr": 0.005384508455643661, "rouge1_recall": 0.19799693389239711, "rouge1_recall_stderr": 0.003182542201890721, "rouge2_fmeasure": 0.05706123212585479, "rouge2_fmeasure_stderr": 0.0013015405321589112, "rouge2_precision": 0.1437328248773931, "rouge2_precision_stderr": 0.005092642043712837, "rouge2_recall": 0.05733865153595204, "rouge2_recall_stderr": 0.001383897371483806, "rougeL_fmeasure": 0.14444789684107232, "rougeL_fmeasure_stderr": 0.0021086022369478444, "rougeL_precision": 0.2302703496004102, "rougeL_precision_stderr": 0.005102072689082256, "rougeL_recall": 0.15449193654318977, "rougeL_recall_stderr": 0.002518846268029969, "rougeLsum_fmeasure": 0.1553009809575637, "rougeLsum_fmeasure_stderr": 0.0023739934749666684, "rougeLsum_precision": 0.24427645994280608, "rougeLsum_precision_stderr": 0.005221824942580713, "rougeLsum_recall": 0.1627325478139627, "rougeLsum_recall_stderr": 0.0026692987289094598}}, "2": {"generate_text_restaurant": {"bleu": 3.0869989165891227, "bleu_stderr": 0.16013219623315433, "rouge1_fmeasure": 0.21814903833369859, "rouge1_fmeasure_stderr": 0.0031866040091908464, "rouge1_precision": 0.3978056637157276, "rouge1_precision_stderr": 0.006383333506870748, "rouge1_recall": 0.19769999489666457, "rouge1_recall_stderr": 0.003338189945298019, "rouge2_fmeasure": 0.08456553732934603, "rouge2_fmeasure_stderr": 0.0016022980395205303, "rouge2_precision": 0.24931076985350456, "rouge2_precision_stderr": 0.006415148520407114, "rouge2_recall": 0.07401280693073875, "rouge2_recall_stderr": 0.0016105308975029838, "rougeL_fmeasure": 0.1673148071155831, "rougeL_fmeasure_stderr": 0.002388385786508644, "rougeL_precision": 0.33763473031412045, "rougeL_precision_stderr": 0.006269179792430952, "rougeL_recall": 0.14877296254487207, "rougeL_recall_stderr": 0.0024783679014017384, "rougeLsum_fmeasure": 0.18463628783286154, "rougeLsum_fmeasure_stderr": 0.002693460028052387, "rougeLsum_precision": 0.357819032438701, "rougeLsum_precision_stderr": 0.006300016081780083, "rougeLsum_recall": 0.16548809371217418, "rougeLsum_recall_stderr": 0.0028048620055066233}}, "3": {"generate_text_restaurant": {"bleu": 3.7011916246059053, "bleu_stderr": 0.14593162958819425, "rouge1_fmeasure": 0.23033091532940253, "rouge1_fmeasure_stderr": 0.0032924392197765163, "rouge1_precision": 0.41822296870814335, "rouge1_precision_stderr": 0.006460520453011632, "rouge1_recall": 0.2071298534116268, "rouge1_recall_stderr": 0.0034383519555611984, "rouge2_fmeasure": 0.09792307241998234, "rouge2_fmeasure_stderr": 0.0017582063387383436, "rouge2_precision": 0.2702971908568115, "rouge2_precision_stderr": 0.00643370737283098, "rouge2_recall": 0.08487129021823478, "rouge2_recall_stderr": 0.00173547236263484, "rougeL_fmeasure": 0.17881874820814597, "rougeL_fmeasure_stderr": 0.0024988437464002213, "rougeL_precision": 0.3572174834274431, "rougeL_precision_stderr": 0.006354476895716739, "rougeL_recall": 0.1574631215849211, "rougeL_recall_stderr": 0.002552084505977488, "rougeLsum_fmeasure": 0.19693757021044234, "rougeLsum_fmeasure_stderr": 0.002816228141853166, "rougeLsum_precision": 0.37863570607432673, "rougeLsum_precision_stderr": 0.006392889923077396, "rougeLsum_recall": 0.17478692469025933, "rougeLsum_recall_stderr": 0.002894063899840589}}, "4": {"generate_text_restaurant": {"bleu": 4.378358579277868, "bleu_stderr": 0.15272962158453546, "rouge1_fmeasure": 0.25232175927064066, "rouge1_fmeasure_stderr": 0.003252006309486809, "rouge1_precision": 0.45882713034371453, "rouge1_precision_stderr": 0.006415593061369954, "rouge1_recall": 0.2255093763170198, "rouge1_recall_stderr": 0.003436954476710523, "rouge2_fmeasure": 0.11225233292150326, "rouge2_fmeasure_stderr": 0.0018233487430859975, "rouge2_precision": 0.299477006298075, "rouge2_precision_stderr": 0.0065384661124707425, "rouge2_recall": 0.09678212220073933, "rouge2_recall_stderr": 0.0018066974497821132, "rougeL_fmeasure": 0.19796718724270523, "rougeL_fmeasure_stderr": 0.002503553099442586, "rougeL_precision": 0.39417067582169574, "rougeL_precision_stderr": 0.006410266792021505, "rougeL_recall": 0.17324797729857383, "rougeL_recall_stderr": 0.002580532804445032, "rougeLsum_fmeasure": 0.2170954167942643, "rougeLsum_fmeasure_stderr": 0.002802720816613775, "rougeLsum_precision": 0.41668133719539296, "rougeLsum_precision_stderr": 0.006408889836300579, "rougeLsum_recall": 0.19157341143212034, "rougeLsum_recall_stderr": 0.002922660612765926}}, "5": {"generate_text_restaurant": {"bleu": 4.890257220178811, "bleu_stderr": 0.13420785403183202, "rouge1_fmeasure": 0.2681436632992601, "rouge1_fmeasure_stderr": 0.003258641900873387, "rouge1_precision": 0.475398003113497, "rouge1_precision_stderr": 0.006327908810294356, "rouge1_recall": 0.24024724261621913, "rouge1_recall_stderr": 0.0034704603090637783, "rouge2_fmeasure": 0.12437458519230632, "rouge2_fmeasure_stderr": 0.0018508476727143834, "rouge2_precision": 0.31255952474835047, "rouge2_precision_stderr": 0.006463307595299056, "rouge2_recall": 0.10772173878741007, "rouge2_recall_stderr": 0.0018408748834838522, "rougeL_fmeasure": 0.21086184259770677, "rougeL_fmeasure_stderr": 0.002497969164716478, "rougeL_precision": 0.40683490044030046, "rougeL_precision_stderr": 0.006313377793533003, "rougeL_recall": 0.18544055623635025, "rougeL_recall_stderr": 0.0026168715932805803, "rougeLsum_fmeasure": 0.23024299776500148, "rougeLsum_fmeasure_stderr": 0.0027826284448924806, "rougeLsum_precision": 0.4295272827032871, "rougeLsum_precision_stderr": 0.006304887170894177, "rougeLsum_recall": 0.20394125936151544, "rougeLsum_recall_stderr": 0.0029282150904557346}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.9016790167616279, "bleu_stderr": 0.08554255656260222, "rouge1_fmeasure": 0.14196393966710072, "rouge1_fmeasure_stderr": 0.002104608543541609, "rouge1_precision": 0.1022798973400133, "rouge1_precision_stderr": 0.0016195028484036645, "rouge1_recall": 0.2472257110564639, "rouge1_recall_stderr": 0.0036244015531403696, "rouge2_fmeasure": 0.02440112287257367, "rouge2_fmeasure_stderr": 0.0010880062384939326, "rouge2_precision": 0.01730961638211292, "rouge2_precision_stderr": 0.0007982969706574101, "rouge2_recall": 0.043897826329439435, "rouge2_recall_stderr": 0.0019301619560314596, "rougeL_fmeasure": 0.12383517753988749, "rougeL_fmeasure_stderr": 0.0017149719617583993, "rougeL_precision": 0.08904777578480996, "rougeL_precision_stderr": 0.0013272362515087367, "rougeL_recall": 0.2169745636685914, "rougeL_recall_stderr": 0.003060682824750885, "rougeLsum_fmeasure": 0.10898759142531142, "rougeLsum_fmeasure_stderr": 0.001653332454666073, "rougeLsum_precision": 0.0783856177341214, "rougeLsum_precision_stderr": 0.0012799850206698408, "rougeLsum_recall": 0.1915896888630931, "rougeLsum_recall_stderr": 0.0029773166596938527}}, "1": {"article_DOC_summary": {"bleu": 0.6174225365598259, "bleu_stderr": 0.05127643687143349, "rouge1_fmeasure": 0.12680695998689423, "rouge1_fmeasure_stderr": 0.001991458093221162, "rouge1_precision": 0.08983663259289545, "rouge1_precision_stderr": 0.0014656640724220792, "rouge1_recall": 0.22526078131503421, "rouge1_recall_stderr": 0.0034677534855223576, "rouge2_fmeasure": 0.018226182677564602, "rouge2_fmeasure_stderr": 0.0008914321799827453, "rouge2_precision": 0.012790758078394766, "rouge2_precision_stderr": 0.0006274060760347892, "rouge2_recall": 0.03314630221773423, "rouge2_recall_stderr": 0.0016600656887166795, "rougeL_fmeasure": 0.11108456989593447, "rougeL_fmeasure_stderr": 0.0016654771979817452, "rougeL_precision": 0.07859662487412902, "rougeL_precision_stderr": 0.0012275599440022234, "rougeL_recall": 0.19823099261868493, "rougeL_recall_stderr": 0.002945509989290099, "rougeLsum_fmeasure": 0.09809900796895703, "rougeLsum_fmeasure_stderr": 0.0015505955006710195, "rougeLsum_precision": 0.06928351933199965, "rougeLsum_precision_stderr": 0.0011286042016073875, "rougeLsum_recall": 0.1759900951750178, "rougeLsum_recall_stderr": 0.002828460597285845}}, "2": {"article_DOC_summary": {"bleu": 0.6922674508727583, "bleu_stderr": 0.09204471088454393, "rouge1_fmeasure": 0.12387534340787068, "rouge1_fmeasure_stderr": 0.002014018603012206, "rouge1_precision": 0.08766778881774849, "rouge1_precision_stderr": 0.0014891572798897187, "rouge1_recall": 0.22075516476448948, "rouge1_recall_stderr": 0.00347829704174308, "rouge2_fmeasure": 0.018525660614874147, "rouge2_fmeasure_stderr": 0.0010059837278380754, "rouge2_precision": 0.013027956332330589, "rouge2_precision_stderr": 0.000726229100756286, "rouge2_recall": 0.03360633434332384, "rouge2_recall_stderr": 0.0017823718475256693, "rougeL_fmeasure": 0.11034857045454226, "rougeL_fmeasure_stderr": 0.001695778207988071, "rougeL_precision": 0.077990231219861, "rougeL_precision_stderr": 0.0012548203942130344, "rougeL_recall": 0.19754633749547196, "rougeL_recall_stderr": 0.0029825381740120733, "rougeLsum_fmeasure": 0.09519799112177334, "rougeLsum_fmeasure_stderr": 0.0015659285260077656, "rougeLsum_precision": 0.06719420386174171, "rougeLsum_precision_stderr": 0.0011491767128342227, "rougeLsum_recall": 0.17095111398494645, "rougeLsum_recall_stderr": 0.002777030573630023}}, "3": {"article_DOC_summary": {"bleu": 0.6181747520447193, "bleu_stderr": 0.049332282488453325, "rouge1_fmeasure": 0.11970202642407167, "rouge1_fmeasure_stderr": 0.0021197440846560966, "rouge1_precision": 0.08698815906142213, "rouge1_precision_stderr": 0.0017345476005806628, "rouge1_recall": 0.210024658038518, "rouge1_recall_stderr": 0.003645324207351596, "rouge2_fmeasure": 0.017445968725527953, "rouge2_fmeasure_stderr": 0.0009133145642922374, "rouge2_precision": 0.012464843327563693, "rouge2_precision_stderr": 0.0006606924610361004, "rouge2_recall": 0.03149526320515733, "rouge2_recall_stderr": 0.0016746812407298752, "rougeL_fmeasure": 0.10682732750074307, "rougeL_fmeasure_stderr": 0.0017831957682131314, "rougeL_precision": 0.07749990811103293, "rougeL_precision_stderr": 0.0014590057257022838, "rougeL_recall": 0.18830492688774997, "rougeL_recall_stderr": 0.0031305358996651976, "rougeLsum_fmeasure": 0.09289835920150129, "rougeLsum_fmeasure_stderr": 0.001624619431174144, "rougeLsum_precision": 0.06755902721539477, "rougeLsum_precision_stderr": 0.0013628857922879717, "rougeLsum_recall": 0.16409288081988832, "rougeLsum_recall_stderr": 0.002865541107683648}}, "4": {"article_DOC_summary": {"bleu": 0.450056497825642, "bleu_stderr": 0.14846722588344094, "rouge1_fmeasure": 0.03470890743725083, "rouge1_fmeasure_stderr": 0.0020520465159130927, "rouge1_precision": 0.030544594130750835, "rouge1_precision_stderr": 0.0021330267604427484, "rouge1_recall": 0.054628538366303006, "rouge1_recall_stderr": 0.0032412927121272896, "rouge2_fmeasure": 0.005129090360707225, "rouge2_fmeasure_stderr": 0.0006597787970364636, "rouge2_precision": 0.004045481859690613, "rouge2_precision_stderr": 0.0005769901773501374, "rouge2_recall": 0.008512380598717688, "rouge2_recall_stderr": 0.0010089042236144311, "rougeL_fmeasure": 0.030241620839919433, "rougeL_fmeasure_stderr": 0.0017471531374678303, "rougeL_precision": 0.026676015773228576, "rougeL_precision_stderr": 0.001870277821695184, "rougeL_recall": 0.04805945027625291, "rougeL_recall_stderr": 0.002827982846252903, "rougeLsum_fmeasure": 0.027168700375590663, "rougeLsum_fmeasure_stderr": 0.0016300199179532333, "rougeLsum_precision": 0.024493166051240776, "rougeLsum_precision_stderr": 0.0018426049402895288, "rougeLsum_recall": 0.042743950057792045, "rougeLsum_recall_stderr": 0.0025687027564754382}}, "5": {"article_DOC_summary": {"bleu": 4.701645545451422e-39, "bleu_stderr": 4.423933320700958e-33, "rouge1_fmeasure": 0.002541504826499715, "rouge1_fmeasure_stderr": 0.0007284967864016516, "rouge1_precision": 0.002880347249708247, "rouge1_precision_stderr": 0.0008552363057144186, "rouge1_recall": 0.0023662681113285227, "rouge1_recall_stderr": 0.0006671844202651334, "rouge2_fmeasure": 0.0003192230499539746, "rouge2_fmeasure_stderr": 0.0001551634211277905, "rouge2_precision": 0.0003609205260148656, "rouge2_precision_stderr": 0.00016999661105349456, "rouge2_recall": 0.00029366489965824033, "rouge2_recall_stderr": 0.00014802866526663323, "rougeL_fmeasure": 0.002078487848546464, "rougeL_fmeasure_stderr": 0.00058210203581558, "rougeL_precision": 0.002324954694563953, "rougeL_precision_stderr": 0.000660753735821071, "rougeL_recall": 0.0019645608817565226, "rougeL_recall_stderr": 0.0005537267229057413, "rougeLsum_fmeasure": 0.002192963200921734, "rougeLsum_fmeasure_stderr": 0.0006172749787154629, "rougeLsum_precision": 0.0024628781194784405, "rougeLsum_precision_stderr": 0.0007073962748883953, "rougeLsum_recall": 0.002062973958092467, "rougeLsum_recall_stderr": 0.0005804863416586463}}}} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0462e8a98aa93cec8e9908f45a551a66c1d7171d --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.22883262521963457, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.027812784355662805 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05586003903362003, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001210122174269387 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24242206204772987, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00406311577145891 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08597342838383007, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016706133404280645 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02681915167649114, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007667503131708552 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.1198429847566052, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002900245290872093 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.04138231986575359, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010877957797931874 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.054558298468825164, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011547348959483191 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23869961861095434, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004019678329876834 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08417220195877628, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0016113500400264057 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.053912329140769884, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011465126361564196 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23448028224096124, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0038290152808916687 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08303053627222617, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015828450619681193 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..cd26b76fa8b2508749a3846b39cb00b7e372020c --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.15575574742170292, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.017955848150539994 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05111535113081184, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010569639568656138 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24170102692127848, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003887710100922636 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07979284621488447, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014891690285340458 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02319539268897493, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006528037703471886 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10947015615742409, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027377316691087903 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03623174370339374, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009530237429355749 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.050193570501271496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001028472018467999 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23749645057907356, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0038081908356859396 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0784179897604357, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014562658859192477 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.049369870759019566, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010133278968197307 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2327791385209823, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003684550808421332 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07704351061583765, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001426638202203612 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a0355db414618402967453486a5142a204427191 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.11268067835710893, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.016117537835788783 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05051442816287885, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011508877054317382 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23644760012414331, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003674644463876482 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07825326885860989, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001440978853110425 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.022522751858263527, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006351580301931131 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10704237700401503, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026405410434178767 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.035231660164805785, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009297500296840882 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04983511890393805, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011358261496601876 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23226685909234288, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0035796541421674885 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07721547440153723, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014241275115813303 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04910718457688571, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011242886510810662 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22930984617447991, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003533204831294875 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07603991744725928, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013988704181332936 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..a3c29e18bf30a37e3ce0e9222f1ccbae633837bb --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.10023958520271987, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.00971716873348663 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05067153957719335, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001191666778337723 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23959949696648097, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003667056307063945 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07829031027709862, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001430784109255991 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02236115249874777, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006367547266607817 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10826670979300317, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002634480071726296 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03498308879465038, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009183882717060898 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.049726423469511824, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011482147824323167 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.235074424902374, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003581208119051607 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07694344069935595, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013971914265873296 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04906977784065461, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011457059871568674 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23202608921361742, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035573781836356197 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07585440271342676, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013846552471020674 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..45c8cdbf49fd6db82356099ca9987b420d941bdb --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1250569767383199, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.021628601699908004 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.051227713247576225, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010795326291589034 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24913049586114253, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0037758792838945734 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07994546949564424, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014211193648653705 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.023147509973142556, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007025722449267816 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.11505322393998348, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002750803567815038 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03616407362539412, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009414194120798864 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.050449943064731946, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010510818215242375 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.24470086974952332, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0036741476280504953 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07875849123282895, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013979148156232973 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04987064991309479, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010470276141344049 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2417182789397841, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036483215475860884 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07780048005428908, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013878222359377333 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0d3e595bad1277a487f76bf1905fcdb08d8771e0 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.13005434642499197, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.010305698888010667 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05083504737121614, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0009791389486581562 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2513764601791396, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038227866407648994 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08007155614694793, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013739824517440447 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.022503923501631235, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006219870748423658 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.11458449377422993, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00275472174018853 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03554619045223363, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009074930456849103 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.049876244862291454, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009596753537288436 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2453692869402309, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003667684923694637 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07854454523551002, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013506756461914645 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04940531544011753, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0009556461222745182 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.24369845054346978, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036791477193529676 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07777039763023255, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013383824495560735 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8797efd04e3a26bad0f2fc8555c3c7e89a66703c --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08918754448453994, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015932292071292517 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.13575112269881404, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021807035000954386 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09912682070533965, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001568429644016599 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.010880211822211733, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004652941571441064 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.017760638377981565, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008339736615674986 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.012242312744229137, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005067795444742174 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07960812665447552, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013419368478906097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12335393646166343, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001962960490931355 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08904469475289996, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013408271016179848 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08286720839573229, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014710250802756778 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1265759930387205, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0020220996079844036 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09217605866260052, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014433103845410448 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.49687897813178417, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.028753272849352945 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..60a19ddd3054913bf3f072cbf4953382800c5c46 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09108345639125676, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013414051332403126 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14374521382155062, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0019373235502897549 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10341097418229313, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013506246569402033 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.006366424495184133, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00033879797030736587 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.011055391527813261, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000720932463146685 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0073507199277508825, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00038900605048980045 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07176268588179166, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009370468184145725 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11646644523105884, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014990807345932596 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08213432595813003, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0009442571910773332 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.0861238381653085, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012500535942948073 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.13658818770137882, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0018342040331144193 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09789009627397521, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0012554566404861117 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.4209954239393304, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03176221976327906 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b4af2b19e6f7474bd3ce11dc2a0364f260e9e378 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08962886049037526, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014557995887673056 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.13142388510558053, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018322115687655425 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09759598981071996, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0013580795670625858 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.006459794912288849, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00039736825277010513 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.009646265425710393, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0005575014671908036 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.007001951186011981, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003928300541809073 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07428847306130615, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011113706498795289 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1117787565135024, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001507759191681407 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08154716868365836, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010323345748373103 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08399764483535616, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013459142554786106 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.12372819672301809, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001704125865736863 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.0915845046326089, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001250008944008003 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.3662014163092573, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03512932309274555 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..08d5597a93da3f56a9d64b06036c1686371d8862 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08885799809368973, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0019348327165460144 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.11654077328550566, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021991705748568326 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.087537415147445, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015354531166868524 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.009914622438569649, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006522325398007237 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.013569391618498709, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008559625787159471 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.009383113142976338, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004958980836726872 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07476162220353817, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0015778653051055946 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1005389837409404, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0018902936583710462 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.07425320055271334, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012438089865080164 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08282976799475586, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018070176003999415 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.10903150651704524, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0020555713704425685 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.08158571031468127, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014179098384736939 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.558567693535546, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.059328676012531334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..195606872938641594e4788c3f18a2dd93008671 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.03600497009970023, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0017041914474905083 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.0405939531186821, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0016845502061290764 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.03122194580188561, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012441428977843587 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00501859392689466, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.000548209402419891 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.005490853123571033, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006056703124515839 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.003984021707078218, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003464773884452124 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.03120550445864107, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014566603437604745 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.03575355085817033, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0014586794204117094 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.027162299961073107, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010492438955382582 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.03366739502631602, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016160090202211065 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.037710615384056095, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0015687838454172922 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.028997162012914524, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001157930832064929 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.2666315699216447, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04602526809695048 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9069cd7c9da70746888b8e8ae092e64e640b7ee8 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.006709044851699656, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0008832342566561942 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.006512638750272697, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0007563435499462167 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.00535516413631097, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0005989788021678713 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.001281837491890342, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.000304608742717695 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0011010851742601637, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0002253109954852027 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0009343759708146302, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00019124522298799218 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.005724582318965228, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0007159612442775775 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.00580938066432881, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0006776468535233207 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.004672160110494701, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0005068592712515271 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.006363502163931598, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0008361966843529074 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.00622891484597148, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0007333253589288016 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.0050875569393090715, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00057132173191095 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 7.886206328273586e-06, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 1.221361669315733e-05 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..54ff944412ce4cb8313c2b6ccc76ada9c2694f44 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 0.8343403861560295, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08848890438051935 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.04482003180956012, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.001997077263135176 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.05496626728471005, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0019739634808670177 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.04331398539056834, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0015510481823475862 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.010968928668382354, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0006660784749433778 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.013837637563125265, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0006656353732837446 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.010736674908829128, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0005134871607341201 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.04082014014811183, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0017013420099803993 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.05346730136724671, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019401768471861783 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.04135106793261327, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0014779152643788263 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.03393512996088468, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.001554141532768072 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.041493274650743935, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0015131168604173573 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.03267655491671789, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011867994802981648 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bc7b5235e12416f54470026dcd1f5a4f53de4e80 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.9406196061775116, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.10258259522175316 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.2787618289181687, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005384508455643661 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.19799693389239711, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003182542201890721 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.18693446462920227, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002787464540486619 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.1437328248773931, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005092642043712837 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.05733865153595204, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001383897371483806 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.05706123212585479, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013015405321589112 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2302703496004102, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005102072689082256 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.15449193654318977, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002518846268029969 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.14444789684107232, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0021086022369478444 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.24427645994280608, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005221824942580713 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.1627325478139627, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0026692987289094598 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.1553009809575637, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0023739934749666684 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..3e68011602d8c63f05d967906dc9d572b3a7fd3e --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.0869989165891227, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.16013219623315433 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3978056637157276, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006383333506870748 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.19769999489666457, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003338189945298019 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.21814903833369859, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0031866040091908464 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.24931076985350456, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.006415148520407114 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07401280693073875, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016105308975029838 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.08456553732934603, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016022980395205303 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.33763473031412045, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006269179792430952 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.14877296254487207, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024783679014017384 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.1673148071155831, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002388385786508644 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.357819032438701, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006300016081780083 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.16548809371217418, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0028048620055066233 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.18463628783286154, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002693460028052387 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..31e409a448de05a19cf49005b96da0057c3bd1e9 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.7011916246059053, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.14593162958819425 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.41822296870814335, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006460520453011632 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2071298534116268, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0034383519555611984 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.23033091532940253, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0032924392197765163 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.2702971908568115, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00643370737283098 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.08487129021823478, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.00173547236263484 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.09792307241998234, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017582063387383436 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3572174834274431, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006354476895716739 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1574631215849211, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002552084505977488 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.17881874820814597, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0024988437464002213 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.37863570607432673, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006392889923077396 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.17478692469025933, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002894063899840589 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.19693757021044234, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002816228141853166 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..e080d0c24458ea93a36fd0a1ca3d39644accddfe --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 4.378358579277868, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.15272962158453546 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.45882713034371453, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006415593061369954 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2255093763170198, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003436954476710523 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.25232175927064066, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.003252006309486809 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.299477006298075, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0065384661124707425 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.09678212220073933, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018066974497821132 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.11225233292150326, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0018233487430859975 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.39417067582169574, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006410266792021505 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.17324797729857383, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002580532804445032 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.19796718724270523, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002503553099442586 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.41668133719539296, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006408889836300579 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.19157341143212034, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002922660612765926 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2170954167942643, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002802720816613775 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b5bea316c858f8c6ff29d7f3c40687d04acc7fb9 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 4.890257220178811, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13420785403183202 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.475398003113497, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.006327908810294356 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.24024724261621913, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0034704603090637783 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2681436632992601, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.003258641900873387 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.31255952474835047, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.006463307595299056 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.10772173878741007, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018408748834838522 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.12437458519230632, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0018508476727143834 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.40683490044030046, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.006313377793533003 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.18544055623635025, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0026168715932805803 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.21086184259770677, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002497969164716478 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.4295272827032871, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.006304887170894177 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.20394125936151544, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0029282150904557346 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23024299776500148, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0027826284448924806 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_0.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..c5ebe0732aa71597c8ade5dd2adf48d53b4c7719 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.1022798973400133, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016195028484036645 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2472257110564639, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0036244015531403696 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.14196393966710072, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002104608543541609 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.01730961638211292, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007982969706574101 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.043897826329439435, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0019301619560314596 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.02440112287257367, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010880062384939326 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08904777578480996, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0013272362515087367 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.2169745636685914, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.003060682824750885 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.12383517753988749, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017149719617583993 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0783856177341214, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012799850206698408 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.1915896888630931, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0029773166596938527 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.10898759142531142, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001653332454666073 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.9016790167616279, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08554255656260222 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_1.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..1d61e262c4829582bac5d778e50f93a2ef046af4 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08983663259289545, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014656640724220792 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.22526078131503421, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034677534855223576 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12680695998689423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001991458093221162 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012790758078394766, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006274060760347892 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03314630221773423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016600656887166795 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.018226182677564602, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008914321799827453 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07859662487412902, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012275599440022234 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.19823099261868493, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002945509989290099 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11108456989593447, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016654771979817452 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06928351933199965, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011286042016073875 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.1759900951750178, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002828460597285845 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09809900796895703, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015505955006710195 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6174225365598259, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.05127643687143349 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_2.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..36ad6c352e666178cf07b1de08ba7f7377d5ee18 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08766778881774849, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014891572798897187 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.22075516476448948, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.00347829704174308 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12387534340787068, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002014018603012206 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.013027956332330589, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.000726229100756286 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03360633434332384, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017823718475256693 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.018525660614874147, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010059837278380754 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.077990231219861, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012548203942130344 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.19754633749547196, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029825381740120733 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11034857045454226, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001695778207988071 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06719420386174171, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011491767128342227 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17095111398494645, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002777030573630023 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09519799112177334, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015659285260077656 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6922674508727583, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.09204471088454393 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_3.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..25d40c056fe0ab1eb1214a148a8834a54dde4c14 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08698815906142213, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017345476005806628 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.210024658038518, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003645324207351596 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11970202642407167, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0021197440846560966 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012464843327563693, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006606924610361004 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03149526320515733, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016746812407298752 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.017445968725527953, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009133145642922374 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07749990811103293, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014590057257022838 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18830492688774997, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0031305358996651976 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10682732750074307, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017831957682131314 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06755902721539477, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0013628857922879717 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16409288081988832, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002865541107683648 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09289835920150129, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001624619431174144 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6181747520447193, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.049332282488453325 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_4.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0d4b2dd1c05bd1b0e90f8d33300ef610bc1b2032 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.030544594130750835, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0021330267604427484 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.054628538366303006, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0032412927121272896 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.03470890743725083, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020520465159130927 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.004045481859690613, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0005769901773501374 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.008512380598717688, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0010089042236144311 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005129090360707225, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0006597787970364636 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.026676015773228576, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001870277821695184 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04805945027625291, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002827982846252903 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.030241620839919433, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017471531374678303 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024493166051240776, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0018426049402895288 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.042743950057792045, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0025687027564754382 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.027168700375590663, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016300199179532333 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.450056497825642, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.14846722588344094 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_5.json b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..8d3db0c2fb8c15ea51b386ec211234033cf5cd99 --- /dev/null +++ b/146m14b14b/evaluation/generation/slim.146m14b14b_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.002880347249708247, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0008552363057144186 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0023662681113285227, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0006671844202651334 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.002541504826499715, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0007284967864016516 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0003609205260148656, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00016999661105349456 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.00029366489965824033, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.00014802866526663323 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0003192230499539746, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0001551634211277905 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.002324954694563953, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.000660753735821071 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0019645608817565226, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0005537267229057413 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.002078487848546464, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.00058210203581558 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0024628781194784405, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0007073962748883953 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.002062973958092467, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005804863416586463 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.002192963200921734, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0006172749787154629 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 4.701645545451422e-39, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 4.423933320700958e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b14b/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_0.csv b/146m14b14b/evaluation/rankeval/146m14b14b_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..c455d285ec47673c8337ba1acc1831c27b3069fa --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.328,0.014853842487270333,0 +anli_r2,acc,0.325,0.014818724459095527,0 +anli_r3,acc,0.34,0.013680495725767789,0 +arc_challenge,acc,0.18430034129692832,0.011330517933037411,0 +arc_challenge,acc_norm,0.23122866894197952,0.012320858834772278,0 +arc_easy,acc,0.4065656565656566,0.010079056419223503,0 +arc_easy,acc_norm,0.36952861952861954,0.009904325878447317,0 +boolq,acc,0.5541284403669725,0.008693659886486843,1 +cb,acc,0.42857142857142855,0.06672848092813058,1 +cb,f1,0.30414746543778803,,1 +copa,acc,0.66,0.04760952285695237,0 +hellaswag,acc,0.28141804421429994,0.00448771884333028,0 +hellaswag,acc_norm,0.29934276040629354,0.004570342034463229,0 +piqa,acc,0.6376496191512514,0.011215040215104565,0 +piqa,acc_norm,0.6289445048966268,0.011271222398600525,0 +rte,acc,0.5487364620938628,0.029953149241808943,0 +sciq,acc,0.685,0.014696631960792496,0 +sciq,acc_norm,0.595,0.015531136990453047,0 +storycloze_2016,acc,0.5831106360235169,0.011401581234266751,0 +winogrande,acc,0.4988161010260458,0.014052446290529019,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_0.json b/146m14b14b/evaluation/rankeval/146m14b14b_0.json new file mode 100644 index 0000000000000000000000000000000000000000..247b1b4f2fbdbce7ac02c2376c81704fa8792b3e --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.328, + "acc_stderr": 0.014853842487270333 + }, + "anli_r2": { + "acc": 0.325, + "acc_stderr": 0.014818724459095527 + }, + "anli_r3": { + "acc": 0.34, + "acc_stderr": 0.013680495725767789 + }, + "cb": { + "acc": 0.42857142857142855, + "acc_stderr": 0.06672848092813058, + "f1": 0.30414746543778803 + }, + "copa": { + "acc": 0.66, + "acc_stderr": 0.04760952285695237 + }, + "hellaswag": { + "acc": 0.28141804421429994, + "acc_stderr": 0.00448771884333028, + "acc_norm": 0.29934276040629354, + "acc_norm_stderr": 0.004570342034463229 + }, + "rte": { + "acc": 0.5487364620938628, + "acc_stderr": 0.029953149241808943 + }, + "winogrande": { + "acc": 0.4988161010260458, + "acc_stderr": 0.014052446290529019 + }, + "storycloze_2016": { + "acc": 0.5831106360235169, + "acc_stderr": 0.011401581234266751 + }, + "boolq": { + "acc": 0.5541284403669725, + "acc_stderr": 0.008693659886486843 + }, + "arc_easy": { + "acc": 0.4065656565656566, + "acc_stderr": 0.010079056419223503, + "acc_norm": 0.36952861952861954, + "acc_norm_stderr": 0.009904325878447317 + }, + "arc_challenge": { + "acc": 0.18430034129692832, + "acc_stderr": 0.011330517933037411, + "acc_norm": 0.23122866894197952, + "acc_norm_stderr": 0.012320858834772278 + }, + "sciq": { + "acc": 0.685, + "acc_stderr": 0.014696631960792496, + "acc_norm": 0.595, + "acc_norm_stderr": 0.015531136990453047 + }, + "piqa": { + "acc": 0.6376496191512514, + "acc_stderr": 0.011215040215104565, + "acc_norm": 0.6289445048966268, + "acc_norm_stderr": 0.011271222398600525 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_1.csv b/146m14b14b/evaluation/rankeval/146m14b14b_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..bd9cecd5818ac78b1ff831ea8b0e84d541e2893c --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.329,0.014865395385928362,0 +anli_r2,acc,0.324,0.014806864733738854,0 +anli_r3,acc,0.3325,0.013605417345710528,0 +arc_challenge,acc,0.18771331058020477,0.011411001314155117,0 +arc_challenge,acc_norm,0.23378839590443687,0.012368225378507137,0 +arc_easy,acc,0.39604377104377103,0.010035580962097935,0 +arc_easy,acc_norm,0.375,0.009933992677987828,0 +boolq,acc,0.5293577981651376,0.008729967580199218,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.3284421618977745,,1 +copa,acc,0.61,0.04902071300001974,0 +hellaswag,acc,0.27962557259510057,0.004478979795506768,0 +hellaswag,acc_norm,0.2967536347341167,0.00455893382299554,0 +piqa,acc,0.6240478781284005,0.011301098166895729,0 +piqa,acc_norm,0.6316648531011969,0.011254089354334357,0 +rte,acc,0.5379061371841155,0.030009848912529113,0 +sciq,acc,0.681,0.01474640486547349,0 +sciq,acc_norm,0.628,0.015292149942040577,0 +storycloze_2016,acc,0.5713522180652058,0.011444094780077097,0 +winogrande,acc,0.5019731649565904,0.014052376259225636,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_1.json b/146m14b14b/evaluation/rankeval/146m14b14b_1.json new file mode 100644 index 0000000000000000000000000000000000000000..7ca570ca69de2fb6d8024957bd6730f1d17e4f77 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.329, + "acc_stderr": 0.014865395385928362 + }, + "anli_r2": { + "acc": 0.324, + "acc_stderr": 0.014806864733738854 + }, + "anli_r3": { + "acc": 0.3325, + "acc_stderr": 0.013605417345710528 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.3284421618977745 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001974 + }, + "hellaswag": { + "acc": 0.27962557259510057, + "acc_stderr": 0.004478979795506768, + "acc_norm": 0.2967536347341167, + "acc_norm_stderr": 0.00455893382299554 + }, + "rte": { + "acc": 0.5379061371841155, + "acc_stderr": 0.030009848912529113 + }, + "winogrande": { + "acc": 0.5019731649565904, + "acc_stderr": 0.014052376259225636 + }, + "storycloze_2016": { + "acc": 0.5713522180652058, + "acc_stderr": 0.011444094780077097 + }, + "boolq": { + "acc": 0.5293577981651376, + "acc_stderr": 0.008729967580199218 + }, + "arc_easy": { + "acc": 0.39604377104377103, + "acc_stderr": 0.010035580962097935, + "acc_norm": 0.375, + "acc_norm_stderr": 0.009933992677987828 + }, + "arc_challenge": { + "acc": 0.18771331058020477, + "acc_stderr": 0.011411001314155117, + "acc_norm": 0.23378839590443687, + "acc_norm_stderr": 0.012368225378507137 + }, + "sciq": { + "acc": 0.681, + "acc_stderr": 0.01474640486547349, + "acc_norm": 0.628, + "acc_norm_stderr": 0.015292149942040577 + }, + "piqa": { + "acc": 0.6240478781284005, + "acc_stderr": 0.011301098166895729, + "acc_norm": 0.6316648531011969, + "acc_norm_stderr": 0.011254089354334357 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_2.csv b/146m14b14b/evaluation/rankeval/146m14b14b_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..f97da07fe40efb898a3d8d94f937977676709077 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.319,0.014746404865473493,0 +anli_r2,acc,0.337,0.0149550879186536,0 +anli_r3,acc,0.3375,0.013655897185463665,0 +arc_challenge,acc,0.18515358361774745,0.011350774438389699,0 +arc_challenge,acc_norm,0.22696245733788395,0.012240491536132872,0 +arc_easy,acc,0.39604377104377103,0.010035580962097937,0 +arc_easy,acc_norm,0.36153198653198654,0.009858506543162062,0 +boolq,acc,0.5434250764525994,0.008712010793695303,1 +cb,acc,0.44642857142857145,0.06703189227942397,1 +cb,f1,0.3011063011063011,,1 +copa,acc,0.54,0.05009082659620332,0 +hellaswag,acc,0.27853017327225654,0.004473595650807679,0 +hellaswag,acc_norm,0.2961561441943836,0.004556276293751938,0 +piqa,acc,0.6305767138193689,0.011260988628572347,0 +piqa,acc_norm,0.6311207834602829,0.011257546676908804,0 +rte,acc,0.5054151624548736,0.030094698123239966,0 +sciq,acc,0.693,0.014593284892852623,0 +sciq,acc_norm,0.635,0.015231776226264903,0 +storycloze_2016,acc,0.5777659005879209,0.011421727692385657,0 +winogrande,acc,0.5067087608524072,0.014051220692330352,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_2.json b/146m14b14b/evaluation/rankeval/146m14b14b_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f428f5ab1ba34c9b5d2d2cd95af439a4b0ce88c0 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.319, + "acc_stderr": 0.014746404865473493 + }, + "anli_r2": { + "acc": 0.337, + "acc_stderr": 0.0149550879186536 + }, + "anli_r3": { + "acc": 0.3375, + "acc_stderr": 0.013655897185463665 + }, + "cb": { + "acc": 0.44642857142857145, + "acc_stderr": 0.06703189227942397, + "f1": 0.3011063011063011 + }, + "copa": { + "acc": 0.54, + "acc_stderr": 0.05009082659620332 + }, + "hellaswag": { + "acc": 0.27853017327225654, + "acc_stderr": 0.004473595650807679, + "acc_norm": 0.2961561441943836, + "acc_norm_stderr": 0.004556276293751938 + }, + "rte": { + "acc": 0.5054151624548736, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.5067087608524072, + "acc_stderr": 0.014051220692330352 + }, + "storycloze_2016": { + "acc": 0.5777659005879209, + "acc_stderr": 0.011421727692385657 + }, + "boolq": { + "acc": 0.5434250764525994, + "acc_stderr": 0.008712010793695303 + }, + "arc_easy": { + "acc": 0.39604377104377103, + "acc_stderr": 0.010035580962097937, + "acc_norm": 0.36153198653198654, + "acc_norm_stderr": 0.009858506543162062 + }, + "arc_challenge": { + "acc": 0.18515358361774745, + "acc_stderr": 0.011350774438389699, + "acc_norm": 0.22696245733788395, + "acc_norm_stderr": 0.012240491536132872 + }, + "sciq": { + "acc": 0.693, + "acc_stderr": 0.014593284892852623, + "acc_norm": 0.635, + "acc_norm_stderr": 0.015231776226264903 + }, + "piqa": { + "acc": 0.6305767138193689, + "acc_stderr": 0.011260988628572347, + "acc_norm": 0.6311207834602829, + "acc_norm_stderr": 0.011257546676908804 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_3.csv b/146m14b14b/evaluation/rankeval/146m14b14b_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..da7e57a3c6a9921f72fa72e33ad623dde4de7f54 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.337,0.014955087918653591,0 +anli_r2,acc,0.347,0.015060472031706618,0 +anli_r3,acc,0.3233333333333333,0.01350837286730022,0 +arc_challenge,acc,0.1825938566552901,0.011289730684565,0 +arc_challenge,acc_norm,0.2295221843003413,0.012288926760890797,0 +arc_easy,acc,0.3952020202020202,0.010031894052790978,0 +arc_easy,acc_norm,0.36826599326599324,0.009897286209010894,0 +boolq,acc,0.5614678899082569,0.008678720482001873,1 +cb,acc,0.375,0.06527912098338669,1 +cb,f1,0.2554143126177024,,1 +copa,acc,0.63,0.04852365870939099,0 +hellaswag,acc,0.279326827325234,0.004477514681328155,0 +hellaswag,acc_norm,0.2956582354112727,0.0045540545376920125,0 +piqa,acc,0.6294885745375408,0.011267826475447665,0 +piqa,acc_norm,0.6322089227421109,0.01125061664667879,0 +rte,acc,0.5379061371841155,0.030009848912529117,0 +sciq,acc,0.686,0.014683991951087955,0 +sciq,acc_norm,0.635,0.015231776226264903,0 +storycloze_2016,acc,0.5750935328701229,0.011431286492205844,0 +winogrande,acc,0.5114443567482242,0.014048804199859325,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_3.json b/146m14b14b/evaluation/rankeval/146m14b14b_3.json new file mode 100644 index 0000000000000000000000000000000000000000..69096b5484b6b9de726044e569fff3cdbfb71c8f --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.337, + "acc_stderr": 0.014955087918653591 + }, + "anli_r2": { + "acc": 0.347, + "acc_stderr": 0.015060472031706618 + }, + "anli_r3": { + "acc": 0.3233333333333333, + "acc_stderr": 0.01350837286730022 + }, + "cb": { + "acc": 0.375, + "acc_stderr": 0.06527912098338669, + "f1": 0.2554143126177024 + }, + "copa": { + "acc": 0.63, + "acc_stderr": 0.04852365870939099 + }, + "hellaswag": { + "acc": 0.279326827325234, + "acc_stderr": 0.004477514681328155, + "acc_norm": 0.2956582354112727, + "acc_norm_stderr": 0.0045540545376920125 + }, + "rte": { + "acc": 0.5379061371841155, + "acc_stderr": 0.030009848912529117 + }, + "winogrande": { + "acc": 0.5114443567482242, + "acc_stderr": 0.014048804199859325 + }, + "storycloze_2016": { + "acc": 0.5750935328701229, + "acc_stderr": 0.011431286492205844 + }, + "boolq": { + "acc": 0.5614678899082569, + "acc_stderr": 0.008678720482001873 + }, + "arc_easy": { + "acc": 0.3952020202020202, + "acc_stderr": 0.010031894052790978, + "acc_norm": 0.36826599326599324, + "acc_norm_stderr": 0.009897286209010894 + }, + "arc_challenge": { + "acc": 0.1825938566552901, + "acc_stderr": 0.011289730684565, + "acc_norm": 0.2295221843003413, + "acc_norm_stderr": 0.012288926760890797 + }, + "sciq": { + "acc": 0.686, + "acc_stderr": 0.014683991951087955, + "acc_norm": 0.635, + "acc_norm_stderr": 0.015231776226264903 + }, + "piqa": { + "acc": 0.6294885745375408, + "acc_stderr": 0.011267826475447665, + "acc_norm": 0.6322089227421109, + "acc_norm_stderr": 0.01125061664667879 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_4.csv b/146m14b14b/evaluation/rankeval/146m14b14b_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..6c6dfeee5b9307ba454cb766cb94f2c4e59db2a1 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.35,0.015090650341444236,0 +anli_r2,acc,0.32,0.01475865230357488,0 +anli_r3,acc,0.33416666666666667,0.013622434813136783,0 +arc_challenge,acc,0.19539249146757678,0.01158690718995291,0 +arc_challenge,acc_norm,0.22781569965870307,0.012256708602326903,0 +arc_easy,acc,0.39436026936026936,0.010028176038392995,0 +arc_easy,acc_norm,0.35395622895622897,0.00981237064417441,0 +boolq,acc,0.5675840978593272,0.008664798701065799,1 +cb,acc,0.44642857142857145,0.067031892279424,1 +cb,f1,0.30886196246139225,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.2802230631348337,0.004481902637505665,0 +hellaswag,acc_norm,0.2960565624377614,0.00455583246277459,0 +piqa,acc,0.6311207834602829,0.011257546676908804,0 +piqa,acc_norm,0.6207834602829162,0.011320331012905077,0 +rte,acc,0.4548736462093863,0.029973636495415252,0 +sciq,acc,0.681,0.01474640486547349,0 +sciq,acc_norm,0.645,0.015139491543780532,0 +storycloze_2016,acc,0.569748797434527,0.011449379528209637,0 +winogrande,acc,0.505130228887135,0.014051745961790516,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_4.json b/146m14b14b/evaluation/rankeval/146m14b14b_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fb08e397b329842d8f3b31ea750915d72f7ce2de --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.35, + "acc_stderr": 0.015090650341444236 + }, + "anli_r2": { + "acc": 0.32, + "acc_stderr": 0.01475865230357488 + }, + "anli_r3": { + "acc": 0.33416666666666667, + "acc_stderr": 0.013622434813136783 + }, + "cb": { + "acc": 0.44642857142857145, + "acc_stderr": 0.067031892279424, + "f1": 0.30886196246139225 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.2802230631348337, + "acc_stderr": 0.004481902637505665, + "acc_norm": 0.2960565624377614, + "acc_norm_stderr": 0.00455583246277459 + }, + "rte": { + "acc": 0.4548736462093863, + "acc_stderr": 0.029973636495415252 + }, + "winogrande": { + "acc": 0.505130228887135, + "acc_stderr": 0.014051745961790516 + }, + "storycloze_2016": { + "acc": 0.569748797434527, + "acc_stderr": 0.011449379528209637 + }, + "boolq": { + "acc": 0.5675840978593272, + "acc_stderr": 0.008664798701065799 + }, + "arc_easy": { + "acc": 0.39436026936026936, + "acc_stderr": 0.010028176038392995, + "acc_norm": 0.35395622895622897, + "acc_norm_stderr": 0.00981237064417441 + }, + "arc_challenge": { + "acc": 0.19539249146757678, + "acc_stderr": 0.01158690718995291, + "acc_norm": 0.22781569965870307, + "acc_norm_stderr": 0.012256708602326903 + }, + "sciq": { + "acc": 0.681, + "acc_stderr": 0.01474640486547349, + "acc_norm": 0.645, + "acc_norm_stderr": 0.015139491543780532 + }, + "piqa": { + "acc": 0.6311207834602829, + "acc_stderr": 0.011257546676908804, + "acc_norm": 0.6207834602829162, + "acc_norm_stderr": 0.011320331012905077 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_5.csv b/146m14b14b/evaluation/rankeval/146m14b14b_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..ca5d1c5f0fe98de13fd2e2e05f44130c923d19d3 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.329,0.014865395385928355,0 +anli_r2,acc,0.322,0.014782913600996667,0 +anli_r3,acc,0.3516666666666667,0.013789711695404794,0 +arc_challenge,acc,0.17832764505119453,0.011186139406711289,0 +arc_challenge,acc_norm,0.21843003412969283,0.012074291605700983,0 +arc_easy,acc,0.4006734006734007,0.010055304474255558,0 +arc_easy,acc_norm,0.3611111111111111,0.009856013425811244,0 +boolq,acc,0.5626911314984709,0.008676043429497423,1 +cb,acc,0.5,0.06741998624632421,1 +cb,f1,0.34486817325800373,,1 +copa,acc,0.59,0.049431107042371025,0 +hellaswag,acc,0.2810197171878112,0.00448578446857668,0 +hellaswag,acc_norm,0.29645488946425014,0.004557606227194299,0 +piqa,acc,0.6245919477693145,0.011297839589776662,0 +piqa,acc_norm,0.6213275299238302,0.011317163404516854,0 +rte,acc,0.5306859205776173,0.030039730592197816,0 +sciq,acc,0.684,0.014709193056057125,0 +sciq,acc_norm,0.644,0.015149042659306623,0 +storycloze_2016,acc,0.5622661678246926,0.01147242507417594,0 +winogrande,acc,0.510655090765588,0.014049294536290393,0 diff --git a/146m14b14b/evaluation/rankeval/146m14b14b_5.json b/146m14b14b/evaluation/rankeval/146m14b14b_5.json new file mode 100644 index 0000000000000000000000000000000000000000..47a11babaeb6ba6405a85e773f92e0d604471be7 --- /dev/null +++ b/146m14b14b/evaluation/rankeval/146m14b14b_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.329, + "acc_stderr": 0.014865395385928355 + }, + "anli_r2": { + "acc": 0.322, + "acc_stderr": 0.014782913600996667 + }, + "anli_r3": { + "acc": 0.3516666666666667, + "acc_stderr": 0.013789711695404794 + }, + "cb": { + "acc": 0.5, + "acc_stderr": 0.06741998624632421, + "f1": 0.34486817325800373 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.049431107042371025 + }, + "hellaswag": { + "acc": 0.2810197171878112, + "acc_stderr": 0.00448578446857668, + "acc_norm": 0.29645488946425014, + "acc_norm_stderr": 0.004557606227194299 + }, + "rte": { + "acc": 0.5306859205776173, + "acc_stderr": 0.030039730592197816 + }, + "winogrande": { + "acc": 0.510655090765588, + "acc_stderr": 0.014049294536290393 + }, + "storycloze_2016": { + "acc": 0.5622661678246926, + "acc_stderr": 0.01147242507417594 + }, + "boolq": { + "acc": 0.5626911314984709, + "acc_stderr": 0.008676043429497423 + }, + "arc_easy": { + "acc": 0.4006734006734007, + "acc_stderr": 0.010055304474255558, + "acc_norm": 0.3611111111111111, + "acc_norm_stderr": 0.009856013425811244 + }, + "arc_challenge": { + "acc": 0.17832764505119453, + "acc_stderr": 0.011186139406711289, + "acc_norm": 0.21843003412969283, + "acc_norm_stderr": 0.012074291605700983 + }, + "sciq": { + "acc": 0.684, + "acc_stderr": 0.014709193056057125, + "acc_norm": 0.644, + "acc_norm_stderr": 0.015149042659306623 + }, + "piqa": { + "acc": 0.6245919477693145, + "acc_stderr": 0.011297839589776662, + "acc_norm": 0.6213275299238302, + "acc_norm_stderr": 0.011317163404516854 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..85d7e8849756c6bcc2dca1a42597974450788229 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d20867107a7af767363191527ab1178dc3cd83ec3b810a1ff4022e2db44f4b +size 27478295 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..67166c78477b7938ee1baf2cd386b9b86e32e44a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f5d7f894b3e6f26fd60e8b50c8f1a7f6526da0a0550feab30d0de52f2a268c +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..befb0c9f79e5d3dfdc28948b5bd9426c379a0d03 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5710490cdf05b6a669dfcd23b10e178ed163b47f4f8990d6818b60b907e21915 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..21b5327b111db2f95763aadee7cd487107ca1ca0 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203ebaa716c7ff3ef6feda63dfcf8f1ef379dedcb510437d0752b808f74a4003 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..613b5d23e91f1abd2029cfac64ffeff3951a7764 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfd3b4ce1c4c348360d2af2597210851bc3369efe5dd30c8c8a6a2e3f901af17 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2b38c5c04001fe74a65ae0a3729dc133a020aaf --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91b6239e8d1d99b20b9e371d7b54bd707599734958a1a9bb704c8f26e1fa194 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e48757353a8fad9462ede8319b487967331e8daa --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654c245bbd7aa1dafa00321a67564b683a3edb52d7d4baa3c16954590e0b50b8 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a14e57cefa92641ab264210a2f9982b797558bba --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f1be181462f8baf15699274b172d9903013a630dd9f7d107b7e764825dc65e +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28cda5f7599d8a17c4a72a8ec57e5587ae26c86c --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0edf16ba17c6e9fa225057b7a8af887041e949ef42236cefa7f527e0db3b410 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..424e39729894b597c25672e4158c7a4297b4f3d9 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c5a4c3467797fe70bca4c8ddc4f0f97152d8d966fb8bc2427e1b7036a7c2e2 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0383df9664a2e7396239e1ed6bf4f4c7802fe456 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d85eec3b7a2c41cf9fb95feef55597d0be64ef7cca953a8d68237f0097f28a4 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..005db8d337eed34d291d9a6810e8b7eef5c1e549 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fcf04690af0dadd6e56ca842d82d73df0acee632b9a345c8333222f7967d01e +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b49bd3c8466069ebb4868c6b87c57c1009666b56 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b99feb7f89ef602c0ce326463937b49fe4064c046311d6587ffa7ef95c6df4 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..823a34a2c4121a41684659b1d7df93d264ffdf21 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93557255cddddbafdf1394c76fca50d1483be0d0b63185b8b31619c0a679ad27 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..887ee2ab5e40cfc2acd663772dfecec786ae24ee --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f887c28486a35197bf86a5a4762b716162219827ae65b9bca5d77784e81e98 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddf4d2da748ad3a2cfb0fa57edbd1666fb39dc04 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c3199eb2c68d025f805993a8d9f96cef3ba724537d10255087e2125dea2e30 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ed2ed6c930745dd83a0eb4cfdfe149c27ac0c5c --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1d65aa4523f7d14dbd9c5b58fb2541fb12c5cfccb5f9689e370b57c35ac407 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..94cec26a92000405eb8f72d0a8dba44a9d19a7a4 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1f4ca74a2cf8889eb232c60c24d2335a387ab2dee5c03bb4b53540194d6628 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79b450ac5c0d7118ac8093f6f177a58b64457cdd --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9cc1fda6a19f935b73956b8ea5aeabec1239a326fa76229baba14d3b26ee39c +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f12c8991d4e43d12d8958903d1ec6ed5e31a940 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc6807670f7420995a4256648751a747ba742ce48f127dec59fd00fb4769de6 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99f463e9e16b6ce04ec8858c36d8929bf9e8a1c --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a774ccd4d158cd753fbe534b3a6e5863c9f8a07bab30bdff9fcd9385628c61 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5536f80a30d1e83268c440245450f376732672d --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcd453772cc8bbf9296124c7682c4e9e8088e81339f3f8e00d9d4b9e4098c21 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65d7164e3aba92f3815b9df90b574e1a719c2b7f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c504328e851a1aa0f9d0234eac5ae68594e7afb175937e4504388724da25d4 +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4449591b8a4971ba51a2b9fd663bc23a77070919 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef9bd16e13fd6ab8f4f3e3e25a69aef724ea3a0bfbb5448a30c0fbd144ca124 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee54994fff5a70f6fcf86654b8ebe5e36648d2c6 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec55b48830bbe90b3673715e648a4c42da2c8ff5609e04a30cee5adeb654701 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd46b5ab1407de7eb79b6ed23da5795142acfda2 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54d9d0e45bbcd64cc08a1110474ec8b47f30676909ee9057f41bec2263331fd +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb5b7f8e7ac8360c3df3bc70646f6cbe7288e01e --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3fd94bdb8efe6840114b5084070fc843bae1750bc373d64565b58778f5fb0b +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cebd7ddf31104760dd3c587f72ae309917a718a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14647fa23470aa7ba25d3b9194c1c98d0814e75dbdcc7f2d1a0bf4abbea22d63 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe5d208986107a45df30567dac194edfbd508854 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b295371cb1663f6a871517cbd77dd22cfa0dc68504f787d7bb3dad113f8f6c0a +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7033005cd6ba27070a423a6160ce03410a978e04 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dba331ee1d82bf41a7121a1f412cd7b36abf5aff087b3281402a3b82f3902cb +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1788fcb506b5d711aa95c71b56df3f14ca07c93 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b19617f3c1cd299f48026b84286d3b1600f1430597ec2ff7b882968c6e94570 +size 27478114 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08ac35731a3c1064aa154beccd12d96ffe8d653f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae223a9fcaa372f5f2b52b238a86c33812963666baf3448d69feaaccb3821db +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3037a4c9f9e9a285e1abd0db7cb3359f3c90ecbb --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3e8a7b17651c615c8b1962c5750a33003dd1d882304efb6adc64124159b919 +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..49085682aefb2c87768b09f7cc2298705127bf62 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8444d69bae206de477aeed8d4bd59c21e45a4cc1050fe4d3bdad26b8258c2a60 +size 27478167 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..050d957c1dc85d883d734234b3bba00be6bc18bd --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3bae5b3e6f0790f31cfcb5ba81da820c8002c55c66c328b681b864de2745220 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..04ee2749b9bacb4d30326adff47a9e3a30126c3f --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9e98026fb391afdc0bb57fd79fd0127dfb9f07f5f8761035de2201241093741 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c2791c6c3eaa54eeb8be732496e8e4410eefcdf --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d51371428e688a0264b9bd28c93a2b379f2c0434686aa7e4002e2ff08d33fc +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7585bdd174c15d3d941470195a1b78541c45365 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c6f68c5d10e6fcfc1e96a760957d2d2185a08f9ee40d785b9aa87b2e6894352 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08c446aeab3494c28e60779462308896ad1945ba --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c985187d070f6ebe51fc25d005474f837c20d86943f8e9641a8151622d5d24e +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d882262986c1a74840ec6f7d93df3e501bc49a4 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894813142cf8eb8c9c414a864702581b23d5c4ccbdf7008f19135d0d182a12b5 +size 27478050 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b6d63c971d631eddd445641876a36771b04b009 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84fb022d0bb62c843543bd83862599ad7349159e985b2628f3eca9b25defda3b +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70fc7bb5770fe6c22189dee0e50789a3173e0919 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f6837c386410cd805e1763f56597ed330c8c5d49e782f11557bc2da8c30df2 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..261e105fd2df4b2b75ae7d6a2c7899661bd61e6c --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60b8466e26b6484ac5d94c16f62f936ac0bcdcbb9679711c42747ee4dd4cda1 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec5a7ac0a2f426b4e5d7ecb0543341a9988bd4bb --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd407d4595a5c922cf704523c00fc9c4da700cb378fde0d4a684dcf1a055a1c +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7d3d88a934246ffbaaf32bba1d15bc3a698fe56 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a929a57fd6e56b1a09880490bf94b061ac109fe0a6a40362b955ba7e85b48dd3 +size 27478231 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..aaab0191327ff2836f1130d3d0399ea199042180 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dee131e28aa2efd427c028ac5cbfa6f3d3a17d8139d70a4121129a459f2c0a +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5435a791229b8fbdbff9f8b46f734b99303070cd --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:349446d20dfa622c0e5144e507486c9d6014c390b93d18b893cf56ff7551d18d +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1c7e933aa48df8f08a809e00cd71393cbb8f44a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d16e187a1d90ac44fcfa40f8b9e1a0a520f93db7b07022096bd5b83ea1fd20 +size 27478434 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ad07c438eb78fb8211add02e54b23c03ff2473a --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb964f21ea3fe4c5eccd119ef084d2009f999cb2954e6c6ea1e1f36664b0d9d3 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c34d25bd9e85bc3d5bbb4936b5a3081f00dd4ef5 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095173b3a1efa513ae9373765052b94d818eb98a6518dfa9186b430314b1603a +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..978f272bed94faaeee6d5590dcc81cacc2f4f9ef --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6939c87724545f2acd019349dcb022e30a07199629fcde8fa0a92a4231991645 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd3f9a2e91c732f4508f2f07af2275c83cb38293 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a249fba695596ff9c2db00056d1c99e6032adcdc88830e59e5ee4ddd9921d555 +size 27478306 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97c191471af4100b2989a5e4624036ada9ff8019 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd2adc6a2e6e47d4f9804e23a175af53b86c37b7f30aa5f208d276fc7e747589 +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3095884373dac484f69c1f59dce19722bf36d5e5 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eebcb85c9584a1128a417081b2c8a25b0f94f6c62bc122c27684715bd4c56ac0 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0708e05aba01ef817e28566e0f23e99feff5c2ec --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c2cefc7b1b85da4fdb77e7a16e018693a440bb864cc706ae8fe68802d56c9fb +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7467241589f0ef32ed75fd37304f040ac2f9627d --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a493aae2097681204776425fed6f6d3aadcf89a6904d4d57a3546f483bb095 +size 27478167 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2120f922b028dd75faaa772c2859439d48341f5 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff62de0a11c983b915d49c24429a0ffd35a2aaafe44fa2e7eba3334837fbdf67 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f633f9d2603d94427ac356b3a55d344ca2cfe3b3 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5044e7906b16843d20219fefd6a47706a766482b55644bf80aef5ad98a5ad3be +size 27478370 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b9552828ce5cddc2bbd8782fcf9fe07df66b1c4 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d348f208be0f5310e67f021f48522cb87bf57a0921de0abf4f5d8c94c6fec7 +size 27478178 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e0a8071e0b755aa7c24228f1184372397bfd419 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55cabe1836a67afe56d081e62220ff882a3f79806e10af3e4fba4a4ef82ff728 +size 27478242 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b2e1d4f6b861ef0289a7e255262324d171e6623 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49bd7cde24e6616a1a21dfcde8cc3da2a0b021979e3d28e6bc6f12af61d161b5 +size 27478359 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7015f1d3bb0100813009329d68dbf2864867ea17 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06cb37ea4597c121c03eccd9c90b85957dddc7ffc2f7141a269376a2bd42879 +size 27478103 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..414db8daedb8265203ab8f386ef1c3d10669f507 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7843f315b34ea2df1b2254522c2a5a59cc54b69d2da058681d77b41d8097e3 +size 27478359 diff --git a/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b41c8de1d728fcebd57fd8aeaa0694fac348c035 --- /dev/null +++ b/146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1f4edfcdebf4e7eb85d1731068a6ab62d265f4179fde6293832b12972cd9fa +size 27478167 diff --git a/146m14b14b/global_step21553/layer_01-model_00-model_states.pt b/146m14b14b/global_step21553/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a54ea58086ae6291098053ed5bc9cc4444f9c4f --- /dev/null +++ b/146m14b14b/global_step21553/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abad36f104b866ef90d59792d5fc432538b2dcd8f0695fc9d10285c0852639c7 +size 80413955 diff --git a/146m14b14b/global_step21553/layer_03-model_00-model_states.pt b/146m14b14b/global_step21553/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9c0d27d9916567c7a054a48f21ebf379d1b26fa --- /dev/null +++ b/146m14b14b/global_step21553/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca891e7c3f7698f29e1d88d325ba1ec5d5bb7b9ecaa90d1e2c65fd643211485a +size 14180099 diff --git a/146m14b14b/global_step21553/layer_04-model_00-model_states.pt b/146m14b14b/global_step21553/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..beb6cf73145d85246c8212654f161ad5de3125f5 --- /dev/null +++ b/146m14b14b/global_step21553/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f93f68e3f3d137a504b0b8d3de47d39c938c94a84cbd6d896a9b8fae34fbc8 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_05-model_00-model_states.pt b/146m14b14b/global_step21553/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a7d3da5b19d74741b3f592072b912f8f755f90 --- /dev/null +++ b/146m14b14b/global_step21553/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e77f3e7a3d1409d82b937ecee55a6f1078a2ce3b55548ce53c79edeb1dd3e5e +size 14180099 diff --git a/146m14b14b/global_step21553/layer_06-model_00-model_states.pt b/146m14b14b/global_step21553/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b7aeeed02d1c7facecab7a7e098d49b381a1aac --- /dev/null +++ b/146m14b14b/global_step21553/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c501ade13b5bb4cbdf6bdd005f6162fba119bb1f0a89f52351d248ac9ecfe1b0 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_07-model_00-model_states.pt b/146m14b14b/global_step21553/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..73c16808120db2a0a1db206487aa958ecffff01b --- /dev/null +++ b/146m14b14b/global_step21553/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20225362738413f99e2829b85ecaeac7ec5a915e7d75c4b8c224115c380e92a7 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_08-model_00-model_states.pt b/146m14b14b/global_step21553/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..953514196fe39a90e6cc46e2ee53b56ef9a2db52 --- /dev/null +++ b/146m14b14b/global_step21553/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e858434cf962b554bdefd5dcd096891b2a47151bd6fb8be43ff109ff1db8fbf +size 14180099 diff --git a/146m14b14b/global_step21553/layer_09-model_00-model_states.pt b/146m14b14b/global_step21553/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..de94c415907ec41131bbb6f47f041cb371d5125c --- /dev/null +++ b/146m14b14b/global_step21553/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e989a8f8030b349f65abfdcda0484797c927009d0bf3d41f0c484cb6ce574081 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_10-model_00-model_states.pt b/146m14b14b/global_step21553/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3f36f8c11bd97155d33855c6d26846a46ffc42d --- /dev/null +++ b/146m14b14b/global_step21553/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abe7bb3684ef7cbfbee982015433a6d99351076fdd776b1d9d65ab23ad798e1b +size 14180099 diff --git a/146m14b14b/global_step21553/layer_11-model_00-model_states.pt b/146m14b14b/global_step21553/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c87a9b1d392bf6276721477ed460e68ef527562f --- /dev/null +++ b/146m14b14b/global_step21553/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7fc4420929926d8683863fae8bc77ce3f2a1da88f0603dfdf7305c016ab8c9 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_12-model_00-model_states.pt b/146m14b14b/global_step21553/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..acc0811ca222184a964f0d6c54e48180c7f5a424 --- /dev/null +++ b/146m14b14b/global_step21553/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5000e853fbf4f4fd26898f940f1ea8fcba0c2717ebebb11d47cd7f8cfdbb166 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_13-model_00-model_states.pt b/146m14b14b/global_step21553/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b84787679f6a7fc2bcad269978897056ddb8653d --- /dev/null +++ b/146m14b14b/global_step21553/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec75c2499ce50a7804db290818e64252fd826abe87176f81c7f9fe049d363f2 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_14-model_00-model_states.pt b/146m14b14b/global_step21553/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..162d6ed8132eeb126a8b6ebdd99e30e69119bf90 --- /dev/null +++ b/146m14b14b/global_step21553/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f7ec98a26919097d2509f031e08b79fe0a163b231dc99cae8541194b7392f5 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_15-model_00-model_states.pt b/146m14b14b/global_step21553/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fb27983f2635a299e2f45cee7d76ea0a07407c2 --- /dev/null +++ b/146m14b14b/global_step21553/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302d70845d3e4b9f6b0a2dd2e4d7e8b1e0cedf2ee0a1b12cfb937d6eddfa5bf1 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_16-model_00-model_states.pt b/146m14b14b/global_step21553/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..78d7a01ac08d9fe0fbb013266de6825d9dbf28b4 --- /dev/null +++ b/146m14b14b/global_step21553/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8928e63e2df46b322cbe7c27b9d40e01c6241e03db035ff077eccbe9ca8bf60 +size 14180099 diff --git a/146m14b14b/global_step21553/layer_17-model_00-model_states.pt b/146m14b14b/global_step21553/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a98bdcb971b56d2087eba6b08a7cf9093802b86 --- /dev/null +++ b/146m14b14b/global_step21553/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63064100f3755797efc70bc115a2076a40251b8b2024ec5d213ef3fda449b1ef +size 14180099 diff --git a/146m14b14b/global_step21553/layer_19-model_00-model_states.pt b/146m14b14b/global_step21553/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9846cae738c793baecc18d1cc894cf1e026fb40e --- /dev/null +++ b/146m14b14b/global_step21553/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8427aa4337eb419d40fbc4ab513d40ff628f480a7fa86dda5100f3af27f413c +size 4291 diff --git a/146m14b14b/global_step21553/mp_rank_00_model_states.pt b/146m14b14b/global_step21553/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d8aa89969c12c1fa71fe7775b7e95ac8691fc4e --- /dev/null +++ b/146m14b14b/global_step21553/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b005093dcfa243f42677fbc280149d73dbb5cd1610c7549a301fd856ad7e18d1 +size 35443 diff --git a/146m14b14b/logs/2820868.err b/146m14b14b/logs/2820868.err new file mode 100644 index 0000000000000000000000000000000000000000..9a12909355161609334062dc977d680b20a38c25 --- /dev/null +++ b/146m14b14b/logs/2820868.err @@ -0,0 +1,1112 @@ +2: 2023-02-09 22:43:16.802685: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802690: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802698: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802691: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802698: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802697: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803047: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803060: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: 2023-02-09 22:43:16.802742: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:16.802762: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803083: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803088: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803093: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803078: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:16.803114: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803614: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803622: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803627: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803644: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803646: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803643: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.803912: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803643: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:16.803670: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804145: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804162: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: 2023-02-09 22:43:16.803990: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804005: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804174: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804175: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804033: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804188: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804059: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804064: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:16.804226: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804075: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:16.804124: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804783: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804794: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804796: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804830: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804845: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804887: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:16.804950: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805173: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805165: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805178: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805260: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805265: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805267: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805257: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:16.805253: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805521: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805532: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805521: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805557: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805567: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805586: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805599: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:16.805623: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:18.395659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395663: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395751: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395657: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395751: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395664: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395754: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395671: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395760: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395679: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395762: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:18.395667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 2023-02-09 22:43:18.395758: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:18.395958: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395964: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395966: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395967: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396083: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396086: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396089: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396091: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396089: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395968: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395972: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395974: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:18.395977: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396100: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396098: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:18.396099: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437115: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437114: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437124: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437124: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437118: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:18.437316: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437318: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437320: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437324: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437323: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437328: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:18.437332: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.438980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438989: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438998: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438997: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438997: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.438996: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.439002: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:18.439620: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439621: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439624: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439626: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439627: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439629: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439631: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:18.439636: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440255: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440250: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440258: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440267: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440263: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440263: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:18.440658: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440660: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440665: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440665: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440666: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440670: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440671: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:18.440674: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449269: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449274: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449275: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449275: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:18.449808: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449811: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449814: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449816: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449817: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449819: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449820: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:18.449825: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457513: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457512: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457519: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457520: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457524: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457520: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457524: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:18.457863: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457867: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457865: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457869: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457871: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457873: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457871: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:18.457877: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464217: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464216: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464222: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464229: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:18.464436: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464438: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464442: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464442: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464444: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464446: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464449: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:18.464452: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:31.785111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785142: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785154: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785162: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785175: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785181: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.785185: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.785923: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.785971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.785954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.785989: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.785966: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.785992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786022: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786013: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786026: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786031: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.786037: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786555: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786608: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786606: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786615: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.786625: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786898: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786927: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787223: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.786974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786988: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787225: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.786998: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.787244: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-09 22:43:31.787229: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.786999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-09 22:43:31.787230: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.787273: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-09 22:43:31.787232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.787295: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.787307: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787239: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787242: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.787242: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.787248: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.787323: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-09 22:43:31.787247: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.787250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787289: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:43:31.787327: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787292: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:43:31.787343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.787304: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.787307: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.787346: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787719: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787750: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787748: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787788: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787811: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788602: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788604: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788604: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788611: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-09 22:43:31.788872: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788611: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788878: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:43:31.788613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788618: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788618: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.788624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788877: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:43:31.788612: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788888: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:43:31.788611: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788629: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.788631: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788881: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:43:31.788631: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.788635: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.788636: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788880: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788896: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788897: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788900: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788902: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788903: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788903: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.788921: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.788940: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789626: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789696: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.789667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:43:31.789630: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789697: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789631: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.789674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789704: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789633: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.789669: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789718: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789712: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789713: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789645: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789709: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789646: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789646: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789651: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789726: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789729: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:43:31.789652: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789654: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789730: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-09 22:43:31.789778: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.789672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789792: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.789671: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-09 22:43:31.789789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.789672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789803: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.789693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789698: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.789701: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789696: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789696: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789697: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789699: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.789699: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.808621: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808652: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808687: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808675: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808686: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.808734: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811717: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811718: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811722: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.811734: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811735: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811739: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811741: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811742: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811743: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.811746: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788255: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788259: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788261: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788260: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788265: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788272: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788276: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788278: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788280: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788281: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788281: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788329: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788329: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788343: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +3: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +4: Successfully preprocessed all matching files. +4: Successfully preprocessed all matching files. +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Loading extension module utils... +4: Loading extension module utils... +7: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Loading extension module utils... +5: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils...Loading extension module utils... +1: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils...Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils...Loading extension module utils... +6: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +2: +2: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +2: +2: +2: Loading extension module utils... +2: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/146m14b14b/logs/2820868.out b/146m14b14b/logs/2820868.out new file mode 100644 index 0000000000000000000000000000000000000000..8c9c8f26d033bf8873cb80fbd0184e4e5b63a1ea --- /dev/null +++ b/146m14b14b/logs/2820868.out @@ -0,0 +1,5653 @@ +Model parameters: d_model 768 ffw_size 3072 kv_size 64 n_heads 12 n_layers 15 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 15 --hidden-size 768 --num-attention-heads 12 --kv-channels 64 --ffn-hidden-size 3072 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-146m14b14bval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_146m14b14bval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_146m14b14b --load checkpoints_146m14b14b --train-weighted-split-paths-path train14b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2820868.json --zero-stage 0 +START 2820868: Thu 09 Feb 2023 10:42:29 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 43.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 41.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 39.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 44.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 42.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 47.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 36.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 48.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 45.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 44.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 38.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 39.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 46.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 41.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 43.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 44.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 44.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 45.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 42.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 39.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 37.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 44.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +4: Launching on nid006909 (4/8), master nid006905 port 9999, GPUs 8, CUDA: True +2: Launching on nid006907 (2/8), master nid006905 port 9999, GPUs 8, CUDA: True +6: Launching on nid006911 (6/8), master nid006905 port 9999, GPUs 8, CUDA: True +1: Launching on nid006906 (1/8), master nid006905 port 9999, GPUs 8, CUDA: True +5: Launching on nid006910 (5/8), master nid006905 port 9999, GPUs 8, CUDA: True +0: Launching on nid006905 (0/8), master nid006905 port 9999, GPUs 8, CUDA: True +3: Launching on nid006908 (3/8), master nid006905 port 9999, GPUs 8, CUDA: True +7: Launching on nid006912 (7/8), master nid006905 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2820868.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 3072 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 768 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-146m14b14bval +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_146m14b14b +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 12 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 15 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_146m14b14b +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_146m14b14bval +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-09 22:44:23,286] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.098 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 102 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 26.484 seconds +0: time to initialize megatron (seconds): 68.607 +0: [after megatron is initialized] datetime: 2023-02-09 22:44:52 +0: building GPT model ... +0: [2023-02-09 22:44:52,742] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-09 22:44:52,742] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-09 22:44:52,742] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.21 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-09 22:44:54,764] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=22 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: undo +0: 19: MixedFusedLayerNorm +0: 20: EmbeddingPipe +0: 21: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-09 22:44:55,078] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-09 22:44:55,079] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.29 GB Max_CA 0 GB +0: [2023-02-09 22:44:55,079] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.23 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-09 22:44:55,080] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-09 22:45:08,000] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-09 22:45:08,000] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-09 22:45:08,000] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-09 22:45:08,005] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-09 22:45:08,005] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-09 22:45:08,125] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-09 22:45:08,126] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.29 GB CA 0.31 GB Max_CA 0 GB +0: [2023-02-09 22:45:08,126] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.91 GB, percent = 6.1% +0: ninja: no work to do. +0: Time to load utils op: 0.18507170677185059 seconds +4: Time to load utils op: 0.21067309379577637 seconds +7: Time to load utils op: 0.2083265781402588 seconds +0: Time to load utils op: 0.10260820388793945 seconds +0: Time to load utils op: 0.10209989547729492 seconds +0: Time to load utils op: 0.10195469856262207 seconds +0: Time to load utils op: 0.10199880599975586 seconds +0: Time to load utils op: 0.10212826728820801 seconds +0: Time to load utils op: 0.10213184356689453 seconds +0: Time to load utils op: 0.10228800773620605 seconds +0: Time to load utils op: 0.0006515979766845703 seconds +4: Time to load utils op: 0.10215020179748535 seconds +4: Time to load utils op: 0.1019585132598877 seconds +4: Time to load utils op: 0.10226917266845703 seconds +4: Time to load utils op: 0.10277819633483887 secondsTime to load utils op: 0.10182809829711914 seconds +4: +4: Time to load utils op: 0.10183167457580566 seconds +4: Time to load utils op: 0.10185718536376953 seconds +7: Time to load utils op: 0.10197710990905762 seconds +7: Time to load utils op: 0.10231637954711914 seconds +7: Time to load utils op: 0.10190653800964355 seconds +7: Time to load utils op: 0.10190415382385254 seconds +7: Time to load utils op: 0.10226941108703613 seconds +7: Time to load utils op: 0.10232424736022949 seconds +7: Time to load utils op: 0.10257530212402344 seconds +4: Time to load utils op: 0.00048613548278808594 seconds +3: Time to load utils op: 0.11252903938293457 secondsTime to load utils op: 0.11244654655456543 seconds +3: +3: Time to load utils op: 0.11257004737854004 seconds +3: Time to load utils op: 0.11257410049438477 seconds +3: Time to load utils op: 0.11327075958251953 seconds +3: Time to load utils op: 0.1126565933227539 secondsTime to load utils op: 0.11327886581420898 seconds +3: +3: Time to load utils op: 0.11302304267883301 seconds +2: Time to load utils op: 0.11104822158813477 secondsTime to load utils op: 0.11106419563293457 seconds +2: +2: Time to load utils op: 0.11103296279907227 seconds +2: Time to load utils op: 0.11108922958374023 seconds +2: Time to load utils op: 0.11110186576843262 seconds +2: Time to load utils op: 0.11111235618591309 secondsTime to load utils op: 0.11111593246459961 seconds +2: +2: Time to load utils op: 0.11112308502197266 seconds +0: Time to load utils op: 0.0004177093505859375 seconds +0: Time to load utils op: 0.0003833770751953125 seconds +1: Time to load utils op: 0.11172676086425781 seconds +1: Time to load utils op: 0.11173486709594727 seconds +1: Time to load utils op: 0.11176633834838867 seconds +1: Time to load utils op: 0.11178421974182129 secondsTime to load utils op: 0.11178183555603027 seconds +1: +1: Time to load utils op: 0.11179089546203613 seconds +1: Time to load utils op: 0.11179924011230469 secondsTime to load utils op: 0.11176371574401855 seconds +1: +0: Time to load utils op: 0.00051116943359375 seconds +0: Time to load utils op: 0.0004761219024658203 seconds +0: Time to load utils op: 0.00041866302490234375 seconds +0: Time to load utils op: 0.000385284423828125 seconds +5: Time to load utils op: 0.11084294319152832 secondsTime to load utils op: 0.11087393760681152 secondsTime to load utils op: 0.11087179183959961 seconds +5: +5: Time to load utils op: 0.11087894439697266 seconds +5: +5: Time to load utils op: 0.11088943481445312 seconds +5: Time to load utils op: 0.11089587211608887 secondsTime to load utils op: 0.11089682579040527 seconds +5: +5: Time to load utils op: 0.11085081100463867 seconds +6: Time to load utils op: 0.1098947525024414 seconds +6: Time to load utils op: 0.10990619659423828 secondsTime to load utils op: 0.10992217063903809 secondsTime to load utils op: 0.10992980003356934 secondsTime to load utils op: 0.10993361473083496 secondsTime to load utils op: 0.10993289947509766 secondsTime to load utils op: 0.10990262031555176 seconds +6: +6: +6: +6: +6: +6: Time to load utils op: 0.10994172096252441 seconds +4: Time to load utils op: 0.00045680999755859375 seconds +4: Time to load utils op: 0.0003407001495361328 seconds +4: Time to load utils op: 0.0003428459167480469 seconds +4: Time to load utils op: 0.0003612041473388672 seconds +4: Time to load utils op: 0.00033545494079589844 seconds +4: Time to load utils op: 0.00033092498779296875 seconds +4: Time to load utils op: 0.0003361701965332031 seconds +0: [2023-02-09 22:45:08,341] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-09 22:45:08,342] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.31 GB Max_CA 0 GB +0: [2023-02-09 22:45:08,342] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +6: Time to load utils op: 0.0008547306060791016 seconds +7: Time to load utils op: 0.0004761219024658203 seconds +7: Time to load utils op: 0.0004024505615234375 secondsTime to load utils op: 0.0004134178161621094 seconds +7: +1: Time to load utils op: 0.0009660720825195312 seconds +1: Time to load utils op: 0.000982522964477539 seconds +1: Time to load utils op: 0.0009810924530029297 seconds +7: Time to load utils op: 0.0004928112030029297 seconds +7: Time to load utils op: 0.0005970001220703125 seconds +7: Time to load utils op: 0.00041675567626953125 seconds +7: Time to load utils op: 0.0005669593811035156 seconds +7: Time to load utils op: 0.0006632804870605469 seconds +5: Time to load utils op: 0.0013475418090820312 seconds +1: Time to load utils op: 0.0013782978057861328 seconds +1: Time to load utils op: 0.001384735107421875 seconds +1: Time to load utils op: 0.0013988018035888672 seconds +1: Time to load utils op: 0.001386880874633789 seconds +1: Time to load utils op: 0.0014336109161376953 seconds +5: Time to load utils op: 0.0016927719116210938 seconds +5: Time to load utils op: 0.0017316341400146484 seconds +5: Time to load utils op: 0.0018727779388427734 seconds +5: Time to load utils op: 0.0018126964569091797 seconds +6: Time to load utils op: 0.0019216537475585938 seconds +5: Time to load utils op: 0.001804351806640625 secondsTime to load utils op: 0.0017979145050048828 seconds +5: +5: Time to load utils op: 0.0017542839050292969 seconds +6: Time to load utils op: 0.0019559860229492188 seconds +6: Time to load utils op: 0.0020134449005126953 seconds +6: Time to load utils op: 0.0020232200622558594 seconds +6: Time to load utils op: 0.002083301544189453 secondsTime to load utils op: 0.0020842552185058594 seconds +6: +6: Time to load utils op: 0.002111196517944336 seconds +3: Time to load utils op: 0.0007386207580566406 seconds +3: Time to load utils op: 0.0009355545043945312 seconds +3: Time to load utils op: 0.0009496212005615234 seconds +2: Time to load utils op: 0.0010271072387695312 seconds +2: Time to load utils op: 0.0013473033905029297 seconds +2: Time to load utils op: 0.0014531612396240234 seconds +3: Time to load utils op: 0.0015611648559570312 seconds +2: Time to load utils op: 0.0015635490417480469 seconds +3: Time to load utils op: 0.0014705657958984375 seconds +3: Time to load utils op: 0.0015931129455566406 seconds +2: Time to load utils op: 0.0014557838439941406 seconds +2: Time to load utils op: 0.0015163421630859375 secondsTime to load utils op: 0.001379251480102539 seconds +2: +2: Time to load utils op: 0.001461029052734375 seconds +3: Time to load utils op: 0.001619577407836914 seconds +3: Time to load utils op: 0.0016970634460449219 seconds +0: [2023-02-09 22:45:08,457] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-09 22:45:08,458] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,458] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:08,562] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-09 22:45:08,563] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,563] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:08,669] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-09 22:45:08,669] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,669] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:08,773] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-09 22:45:08,774] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,774] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:08,880] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-09 22:45:08,881] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,881] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:08,984] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-09 22:45:08,985] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,985] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:09,094] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-09 22:45:09,095] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,095] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:09,199] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-09 22:45:09,200] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,200] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.06 GB, percent = 6.2% +0: [2023-02-09 22:45:09,200] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-09 22:45:09,200] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-09 22:45:09,200] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-09 22:45:09,200] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-09 22:45:09,201] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-09 22:45:09,202] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-09 22:45:09,203] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-09 22:45:09,203] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0006086826324462891 seconds +0: [2023-02-09 22:45:09,204] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-09 22:45:09,214] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=22 [0, 22) STAGE_PARAMS=146525952 (146.526M) TOTAL_PARAMS=146525952 (146.526M) UNIQUE_PARAMS=146525952 (146.526M) +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,452] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,454] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,474] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,552] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,575] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,577] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:09,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:09,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,569] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:09,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:09,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:09,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:09,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,689] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:09,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,696] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:09,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,698] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,698] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,699] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,700] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:09,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:09,702] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,703] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,704] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,705] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,706] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:09,706] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,707] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,708] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:09,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:09,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:09,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:09,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:09,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:09,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:09,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:09,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:09,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:09,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:09,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:10,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,011] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,021] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:10,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,024] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:10,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:10,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:10,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:10,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:10,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:10,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:10,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:10,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:10,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:10,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:10,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:10,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:10,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:10,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:10,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:10,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:10,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:10,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:10,597] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,598] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:10,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:10,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:10,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:10,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:10,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:10,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:10,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,863] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,864] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,867] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:10,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:10,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,891] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:10,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:10,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:10,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:10,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:10,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:10,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:10,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:11,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:11,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:11,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +3: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:11,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:11,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:11,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:11,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:11,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:11,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:11,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:11,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:11,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,228] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +2: [2023-02-09 22:45:11,230] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +0: [2023-02-09 22:45:11,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,232] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-02-09 22:45:11,234] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +7: [2023-02-09 22:45:11,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,236] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +7: [2023-02-09 22:45:11,237] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +1: [2023-02-09 22:45:11,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,240] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +1: [2023-02-09 22:45:11,242] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +2: [2023-02-09 22:45:11,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,242] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +2: [2023-02-09 22:45:11,244] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +4: [2023-02-09 22:45:11,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,246] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +4: [2023-02-09 22:45:11,247] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +3: [2023-02-09 22:45:11,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,248] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +7: [2023-02-09 22:45:11,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,249] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +6: [2023-02-09 22:45:11,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,249] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +3: [2023-02-09 22:45:11,250] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +7: [2023-02-09 22:45:11,251] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +6: [2023-02-09 22:45:11,251] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +3: [2023-02-09 22:45:11,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,256] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +7: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,257] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +3: [2023-02-09 22:45:11,258] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +7: [2023-02-09 22:45:11,258] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +0: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,259] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +5: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,261] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +5: [2023-02-09 22:45:11,261] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-02-09 22:45:11,263] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +4: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,266] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +4: [2023-02-09 22:45:11,267] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +0: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,268] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +0: [2023-02-09 22:45:11,269] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +0: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,270] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +5: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,271] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +0: [2023-02-09 22:45:11,272] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +5: [2023-02-09 22:45:11,272] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +6: [2023-02-09 22:45:11,274] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,274] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +2: [2023-02-09 22:45:11,275] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,275] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +6: [2023-02-09 22:45:11,276] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +1: [2023-02-09 22:45:11,276] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,276] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +2: [2023-02-09 22:45:11,277] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +1: [2023-02-09 22:45:11,278] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +1: [2023-02-09 22:45:11,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,279] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +0: [2023-02-09 22:45:11,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,280] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +4: [2023-02-09 22:45:11,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,280] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +4: [2023-02-09 22:45:11,280] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +1: [2023-02-09 22:45:11,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,281] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +0: [2023-02-09 22:45:11,282] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +4: [2023-02-09 22:45:11,282] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +4: [2023-02-09 22:45:11,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,283] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +1: [2023-02-09 22:45:11,283] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +1: [2023-02-09 22:45:11,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,284] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +6: [2023-02-09 22:45:11,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,284] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +6: [2023-02-09 22:45:11,285] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +1: [2023-02-09 22:45:11,285] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +6: [2023-02-09 22:45:11,286] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +3: [2023-02-09 22:45:11,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,286] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +3: [2023-02-09 22:45:11,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,287] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +7: [2023-02-09 22:45:11,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,287] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +3: [2023-02-09 22:45:11,288] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +7: [2023-02-09 22:45:11,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,288] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +3: [2023-02-09 22:45:11,289] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +7: [2023-02-09 22:45:11,289] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +5: [2023-02-09 22:45:11,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,289] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +7: [2023-02-09 22:45:11,290] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +5: [2023-02-09 22:45:11,291] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +0: [2023-02-09 22:45:11,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,291] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +3: [2023-02-09 22:45:11,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,292] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +3: [2023-02-09 22:45:11,293] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +0: [2023-02-09 22:45:11,293] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +0: [2023-02-09 22:45:11,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,294] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +4: [2023-02-09 22:45:11,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,295] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +0: [2023-02-09 22:45:11,296] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +4: [2023-02-09 22:45:11,297] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +0: [2023-02-09 22:45:11,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:11,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +2: [2023-02-09 22:45:11,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +2: [2023-02-09 22:45:11,298] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +6: [2023-02-09 22:45:11,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,299] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +0: [2023-02-09 22:45:11,300] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +2: [2023-02-09 22:45:11,300] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +2: [2023-02-09 22:45:11,300] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +4: [2023-02-09 22:45:11,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,300] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +6: [2023-02-09 22:45:11,301] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +4: [2023-02-09 22:45:11,302] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +6: [2023-02-09 22:45:11,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,302] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +4: [2023-02-09 22:45:11,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,303] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +5: [2023-02-09 22:45:11,303] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +6: [2023-02-09 22:45:11,304] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +5: [2023-02-09 22:45:11,304] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +4: [2023-02-09 22:45:11,304] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +2: [2023-02-09 22:45:11,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,307] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +3: [2023-02-09 22:45:11,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,307] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +2: [2023-02-09 22:45:11,309] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +3: [2023-02-09 22:45:11,309] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +7: [2023-02-09 22:45:11,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,310] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +7: [2023-02-09 22:45:11,311] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +7: [2023-02-09 22:45:11,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,324] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +7: [2023-02-09 22:45:11,326] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +1: [2023-02-09 22:45:11,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,327] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +1: [2023-02-09 22:45:11,329] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +5: [2023-02-09 22:45:11,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,331] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +5: [2023-02-09 22:45:11,333] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +7: [2023-02-09 22:45:11,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:11,333] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +3: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,334] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +7: [2023-02-09 22:45:11,335] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +3: [2023-02-09 22:45:11,335] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +4: [2023-02-09 22:45:11,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:11,336] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +6: [2023-02-09 22:45:11,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,336] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +4: [2023-02-09 22:45:11,337] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +6: [2023-02-09 22:45:11,339] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +5: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,341] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +6: [2023-02-09 22:45:11,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,341] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +5: [2023-02-09 22:45:11,342] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +6: [2023-02-09 22:45:11,342] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +2: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,344] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +5: [2023-02-09 22:45:11,345] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +2: [2023-02-09 22:45:11,346] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +2: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:11,348] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-09 22:45:11,349] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +5: [2023-02-09 22:45:11,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:11,356] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-02-09 22:45:11,357] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +1: [2023-02-09 22:45:11,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,363] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +1: [2023-02-09 22:45:11,364] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +1: [2023-02-09 22:45:11,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:11,374] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +1: [2023-02-09 22:45:11,376] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +6: [2023-02-09 22:45:11,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:11,384] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-02-09 22:45:11,385] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +3: [2023-02-09 22:45:11,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b14b/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:11,388] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +3: [2023-02-09 22:45:11,389] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +0: successfully loaded checkpoint from checkpoints_146m14b14b at iteration 0 +7: time (ms) | load-checkpoint: 2179.79 +0: estimated model parameters: 0.146525952 +0: estimated model parameters without embeddings: 0.106319616 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-09 22:45:11 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.032006 seconds +0: number of documents: 28730568 +0: > dataset split: +0: train: +0: document indices in [0, 28730568) total of 28730568 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.088 seconds +0: total number of samples: 6713794 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.047670 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.057 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-09 22:45:24 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 18976.50 | train/valid/test-data-iterators-setup: 13021.32 +0: [after training is done] datetime: 2023-02-09 22:45:24 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.319729E+00 | lm loss PPL: 2.765284E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2820868: Thu 09 Feb 2023 10:45:46 PM EET diff --git a/146m14b14b/sbatch_146m14b14b.sh b/146m14b14b/sbatch_146m14b14b.sh new file mode 100644 index 0000000000000000000000000000000000000000..ecc1fa389c239a3ea6f8103201681eb820e11ac2 --- /dev/null +++ b/146m14b14b/sbatch_146m14b14b.sh @@ -0,0 +1,164 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b14b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT +# Start from scratch +rm -rf "$CHECKPOINT_PATH" "$TENSORBOARD_PATH" + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train14b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=5_517_578 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 55_176 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b14b/sbatch_146m14b14bval.sh b/146m14b14b/sbatch_146m14b14bval.sh new file mode 100644 index 0000000000000000000000000000000000000000..39b3024301159a4b16d7716fc0337b89a74da427 --- /dev/null +++ b/146m14b14b/sbatch_146m14b14bval.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b14bval +VARIANT_CKPT=146m14b14b + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train14b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b14b/tensorboard_146m14b14b/events.out.tfevents.1675875178.nid005210.18462.0 b/146m14b14b/tensorboard_146m14b14b/events.out.tfevents.1675875178.nid005210.18462.0 new file mode 100644 index 0000000000000000000000000000000000000000..5ba31ddd1adb783ef4ae3230d9e95c45f07ce60d --- /dev/null +++ b/146m14b14b/tensorboard_146m14b14b/events.out.tfevents.1675875178.nid005210.18462.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd8ad9034d52e7a465ef7c71241d7c2304f75c21453939904aa430846f4ceed +size 38441517 diff --git a/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675555841.nid005497.124955.0 b/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675555841.nid005497.124955.0 new file mode 100644 index 0000000000000000000000000000000000000000..965afa12aac559492fa9a478f7e3a9dd8bf80b8f --- /dev/null +++ b/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675555841.nid005497.124955.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f510a40390b1bfabb8d94fac82399df69daee0c0053f41faadb57edd6dca1b +size 980 diff --git a/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675975463.nid006912.124025.0 b/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675975463.nid006912.124025.0 new file mode 100644 index 0000000000000000000000000000000000000000..8ec863cd20cfc09ebaa486e5546cf147327cbbec --- /dev/null +++ b/146m14b14b/tensorboard_146m14b14bval/events.out.tfevents.1675975463.nid006912.124025.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5278c60381d94bf167c2244da2371ebd26e0671041ff680bd756891933e43e7a +size 980 diff --git a/146m14b14b/transformers/config.json b/146m14b14b/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639868e787b6f7945a258cf690dc9b5dba7be4a6 --- /dev/null +++ b/146m14b14b/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 768, "n_layer": 15, "n_head": 12, "n_inner": 3072, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/146m14b14b/transformers/pytorch_model.bin b/146m14b14b/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a48aed4b0ae7a6372ead377ad949c7b2d52ca96d --- /dev/null +++ b/146m14b14b/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb54021e12eda1b630ef8f149ca1374a383a7514bafdfc63a63a65197a6ff03 +size 418947557 diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0089cf5506de26614ec8c15a63c55f71815dc608 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.23336285971913792, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.026831547216034552}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05439865962968967, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011092434568872975}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.24374100837448018, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004154005834452381}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08446571459891085, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015824965912950772}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02550841626771464, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006691581780940698}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.12014632144828626, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002861946551453886}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03996425304915668, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000989713007421649}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0531129917083209, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010634389473944373}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23984830582926353, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004111595583702724}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08265416866191247, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015305727288322003}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.052377103781293156, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010589109148837366}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23522556317101853, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003942690776656362}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08134464486582038, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015093395667322115}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..bcd504b37e252e77df00e7fc06364b3e73f3baa3 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.149999320100551, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.01648600857868501}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05039389233407519, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010678479734785133}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23911605188763663, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004217097544169996}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.0783024499591439, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015115599781434076}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.022689565244003423, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006492234607511071}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10804796123371856, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002772425440915182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03533421218479914, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009441723878638388}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.049215318671431786, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010308225696802156}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23439123752368696, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.004133972873584638}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0765840417258607, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014686631424978348}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.048623672915347575, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010273102967872621}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22915163434854377, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00394130010753112}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07550976829415487, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014525784885102127}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..6c5a0614063b44b344a90faa607a734d5703c91f --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.13305791271479062, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.011210726963048447}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04874142647590828, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001024411277927112}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23418663847997345, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038675109072007405}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07604718629625555, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014484478235269312}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02178125656158737, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006229612925274444}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10462775115881133, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026242942688188046}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.034017897932799156, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009135473678299833}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.047663521069835214, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009922323810451167}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.22896434652678516, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003766547321885562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07445958002040497, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.00141287468923303}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04730394599720064, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.000995041687823415}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22603927988076686, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036652559910908635}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.0737768400907496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014096194296899693}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..aa44f46bde60ede3a708aef671029517cfd6f711 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.10368475460486865, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.011060778041099317}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.048322862828509484, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001091294602922774}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2281459683846524, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003630469769192213}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07485950515286233, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014144928012380664}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021765575511902755, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007376999524486791}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10112935517460186, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025273038118034915}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.033414173983495055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008989911483642444}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.047310277561040476, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010701645225700237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.22271748235416333, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035317693762082583}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0733042353653659, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013877940713480257}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04700153630836809, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010655951813045416}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22158687380299466, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035274636245185057}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07279776055251803, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013786706937308762}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..fc0cc949c6579b48b3fab2ce6431ef8554df874b --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.12643210930285223, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.017689451259510113}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.048546496122212304, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010917543766011267}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23329777643622032, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.00377927308236942}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07550620564282749, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014275424158662932}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021812470526869524, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007410163805045418}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10384870030998751, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002559034716018191}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03361031713979665, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009094774573230576}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.047406756420334914, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010669755185219207}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.22692062870092394, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003637107260174285}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.0737633812147375, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001396840481575515}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04722034321153461, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010651409333852174}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22637726105546893, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036450661129995517}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07343239839440881, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001390175033514176}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b3bc0d894d6f859074d405c77c1df4b189b4e12a --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.14580784113904285, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02169837835551737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04858688695146614, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0009919570128492023}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23936897854801265, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003809570838617069}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07635558897011635, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014067473320664224}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.021403300583302882, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005992879742971646}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10635401405034506, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0026320301985926294}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.0336482487712469, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008809384464421846}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04743074568047055, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0009628261081974606}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23310511154456653, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003667912499413062}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07457995007202929, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013717292986929117}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04717939159930382, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0009611612661616537}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23214615779014944, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036745231846563382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07413039496102994, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013645157195961873}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..227cda1ce76cc8fa40781e5a44c4ea2ec1ab9218 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09363473178880811, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016796809875309112}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14418256512962083, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002167398392749155}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10422675836754251, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001600341131106392}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012160392139666808, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005105463730603428}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.019121459317877912, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008232714019808441}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.013569083806173431, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000536933125563704}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0820107922443091, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013749923256347872}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12893888139963622, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018862133214712646}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09199729526410924, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013200459901581483}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08652832387527064, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015294411673741832}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1344951570087262, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0020253753831603227}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09659833472928117, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014598821948214504}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5430730295333313, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0365815466590515}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..73299c870376f644cf9247656bb3faad17bd0022 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.113864402018691, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015404538134881224}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1838117919523142, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023514998715679107}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.12944623984650427, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015508995745003894}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012715360202877583, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004729696535287597}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.022779782580969615, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010072688122998766}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014839503603430174, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005401931668179484}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08328579570968432, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010324983906124013}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1380185552697111, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001751171256108266}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09507660161625574, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010200141214599331}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10740807503883001, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014382706335826095}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.17373508082119807, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022092321023785564}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.12204763022142331, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014376576746715666}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7631030821901151, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03637126016426104}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9af58f21bf5fbfa124cc48b1d045dc39a12505ea --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.11393029559700361, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016763730882923063}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1769694736700524, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023424150386553723}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.125968585192418, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001567482533109581}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.01301367400947571, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005143488258905196}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.021873142006066014, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000934130847859976}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014779019287405309, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005575381148601111}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0874972075486996, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001231137532979079}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13892758321292495, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001814780335285223}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09713467955259053, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011252807281427579}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10695613459471714, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0015471539643745647}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.16645890967746207, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00217354070441736}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.11827617802959285, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014395913434439003}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.745302789030922, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.041165212986491714}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2861e1fdb69d82930650208e9fb937243d9b3747 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10517912682002659, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020169944192517456}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.14986203056954372, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002553781810198287}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.1080907737092221, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0017189902209006306}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.01411384967174985, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005796594561779423}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.022758512637964766, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010268466428699436}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.015116060561354339, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005706720135531178}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08500267190413809, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016364237998568027}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12241576028305143, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020787177316311327}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08717745912804374, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013335661136481402}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09826602680946438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018880997129709805}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.13990265269905022, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023636459311459136}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10075080145793731, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015788401832700393}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.9375524807033079, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.05076091375311075}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b65dc642c3596c5a264954361d6320cb90569888 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.04021891620427611, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.00179527122100358}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.05458669351809771, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021686949760662015}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.038297790163100025, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001450936870070239}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.007175083221520411, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006861959426040823}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.010377039775321539, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008398548385378956}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.006564722152989603, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00044826882706775823}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.03365949856690121, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001518451427611522}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.046270525077678676, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018467375730547731}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.03193814381233648, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0011842243614109398}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.03743305339041787, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016843420360592007}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.0506510365437872, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002014009342396866}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03543698761206124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013366330300040165}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.2455685388768164, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.023312196201743386}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9f866241cde4c4c0f920f8d18933eae5526e9118 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.00666688354748637, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007557736630150564}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.009539134461787335, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0010266547706396002}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.006452133253476872, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006426391950933609}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.001389249823684832, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003303542509473268}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.002107899216680717, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000406297855701019}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.001256549559807457, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00021119936913661453}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0055221266380235945, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006252483839771813}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.00798094708260653, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008641648642621242}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.005351282375182499, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005270601162356447}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.006186563146749757, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007119271694803169}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.008778707261151484, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0009486499053541579}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.0059470818044703435, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005945085971730911}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 5.018187832788465e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 9.001238569701927e-07}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..081fc6eb36d83da7fa5428052a186151aa509362 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.0, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..9aca259bcf3de96765900e02dcf2ec6365c734c8 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.009859551325065, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.13278046275335678}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3578229387593085, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003856721380827842}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.25948739086059736, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002640226000794725}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2740595010030775, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002385396193975501}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.13054044528410688, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0037429245675078405}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07406180042426694, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014501941041824516}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0790458842297352, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001445148755996047}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2735828328272949, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0036193449495243797}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1900509586663965, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001986984907305974}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2018458460713563, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018101367404328157}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.30371799193520876, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003752209403616017}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21475934437687166, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022815651046262236}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.22766636955410458, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002095153498100042}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..117740b97173e9d8f3173b4c1428056ca3800368 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.7806427294993, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1310832353636796}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4023615733786828, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0050997654220666}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2604859328757677, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003124610081613822}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2720615139154822, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0027687783144970625}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.20471652877426683, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0053699474888414716}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.08914356908095893, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017253335935271583}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.09494958872823868, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0016238555093460345}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.324312305309799, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005059911825690109}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19325765142384435, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002374927639383697}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20383717565040163, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002104411024247141}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.35265257546958945, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005084624220690631}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21792408382120357, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002710638175002805}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2287941438454128, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0024025488670924725}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..7143ec525bca19c073f32ebb681a2b8321026c05 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.379158787794686, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.17424945572147568}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.4200250914129759, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0053484767449253675}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2597005038015286, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031776678746421685}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2739605028454004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028626358837710963}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2324625707379104, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005656196308354222}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.09787654675007354, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00178618058095944}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.10541398390697426, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017164566544297254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.3452442707098063, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005326530824347986}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19634967424253236, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002444802090072814}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20932493009842154, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002213566067169264}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3730608496140118, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0053445330283940886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21983351464384387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027657542997588506}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23343576076206798, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0025054329416814864}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b5e5a8b54c4902585e8d14148589bc970c4a778f --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.84362787810031, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1914395641675779}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.44142915899786084, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005252644410011952}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.27293406189445324, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003231606561873733}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.28912203831106437, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0028684342474448}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.24580187764860698, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005582820891133567}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.10804855284127729, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018386317913512528}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.11600779243298241, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017448325124731262}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.36278920910440526, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005259330590831443}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.20655546999122476, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024658407342331543}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.22136695075479842, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.002213265282685222}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.39113354412345547, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005259888245133616}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.23109513502236428, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028128632311166086}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.24635463422951365, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002518259186545489}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..abbfb26bce765044420f0f7fabefdff9ac29bc11 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 6.294657081635081, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.1191423611800128}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.45758123798499606, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005027609714045855}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2870117768804813, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031304392663814677}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.30818385424543937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002812315846665837}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.2503889688775632, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00532362565564733}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.11655912503614715, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018305152372365434}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.1273300920831121, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0017849338112142722}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.37623432283720026, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005027601070585646}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.22058281726854637, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0024416343969262534}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23900514298098396, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0022097628891594285}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.40434981140634907, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005043898533403243}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.24370773833649576, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002739699465790519}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2631784707358199, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002487054103176312}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_0.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..0bd5e1b8a738f833fb518ccd787d07585a4329a4 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.09869862910594125, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015754263415516496}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2425997427212717, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035498618411088337}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.13822546470552896, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020817271615335853}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.016787743878533688, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007005319097605839}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0425780543002491, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0017220443443167939}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.023697282994221247, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009611791067872703}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08626521539341878, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012705285495719658}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.21327598743091675, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029459890547047195}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12098743849059979, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001680519826816699}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.07499548048418175, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011973587779807702}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.18673351936586377, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002877930418991892}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.1053525696001833, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001600085618320443}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7848159984641091, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0846489540307912}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_1.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c766c7e5adb62114d6b8614522c7685f3a533522 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.09124145037680485, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014659607841345699}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.22757570030500165, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034586902105503583}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12866565250951, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019918374752407302}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012967504798466119, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006125196007516605}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03365509386476815, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015985879929148613}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.018497303697898898, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008688724913792904}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07973413334544867, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012126850079835258}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.1999503525963445, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029346063767394658}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11258738658901131, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001654534085681258}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.07040045782230749, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011336127776379584}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17735154114603943, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002793790877961365}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09952018094863402, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015543045742524805}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6629456628355466, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08774497677853796}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_2.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2167925dad93cdd9754382c3c7c20c54878e3c87 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.09013053440861082, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014220128061208542}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2273610167464681, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0034008282640079156}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1275054327542126, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001936094417525703}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.013685249717276669, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006153358129211847}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.03651263650932684, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016893088610047168}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.019662620850184154, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008838457269276937}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08023430784844289, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011971788003166785}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20334042051960866, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0029270688857344516}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11364996341359528, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016363233390032223}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06885212236096724, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010822260185504432}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17558641489646157, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027535532125502033}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09766604816138601, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001494448373327452}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6581479118355196, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07454636170059836}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_3.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..8fb6341a48a8fcb0317801adafe1aa960f30b0bc --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08887731112301472, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016693801787118184}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2137238957435145, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035351437827021116}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12223643758741752, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002043267213282596}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01336910380993121, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006644057446671918}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.034200003110033475, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001715535153457774}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.018803041332499517, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009204376153200208}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07951277816840352, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013813570731518456}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.1929746396810042, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0031195403408695475}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10976787111848983, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001747164342136605}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0692667028628289, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012719926110399762}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16841312507750444, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0028632604489444353}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09547379870346774, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015781898824613876}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6487530237798568, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07015562797768161}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_4.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..46c50b7c3a06b0341ecb9cf6229f018962b6c9b3 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.030778909853395358, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0020706771099415055}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.05446169618855107, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0031240949944244803}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0352935136087583, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002067265021549025}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.004626594678435454, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0005432919775929613}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.008754579533738186, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008926051691577739}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.005563844272859168, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005736273252218394}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.026771846295171677, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001757008435057672}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04820099757783116, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002741549258464254}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.030796858749521736, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017427639473783613}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024799039714845435, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0017459449627102337}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.043284204930067245, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025292545975519414}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.02795059198035774, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016496415851520105}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.31677978361218245, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11491101515831749}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_5.json b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0cb2ac25488665e3483486e3a3abcdf82cbeffe6 --- /dev/null +++ b/146m14b1b5/evaluation/generation/agg.146m14b1b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.0024889331628189042, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0006815397373640728}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.002181607723763231, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006183413432850734}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.0022773127753522747, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00063421295615357}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0005040504428810351, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00018325075012185258}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0003980023860711985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00015801551672316093}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00043251733980451243, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0001624532835097625}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002154819197651723, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005785407429483231}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.00188617310535414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005267417519795776}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.001966631793324211, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005373577051134612}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002154819197651723, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.000586342779903712}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0019061538009282621, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005449980000845078}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0019792726415445738, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005523755455290908}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 5.0129268480742525e-39, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 2.561058897904247e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..557baedf9550cc6c75a54b7f4b8761f6227656ef --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0905a0d9b338196b13d9b4f38177a52c0faa846812f78ea1f05e431d23016e37 +size 4160369 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b48e71de9431ce7b140482688a63f083e80427d7 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed3e8fc61c08d0082e53e56d07ee5e189817b5a1064cb4fd953689da546b6925 +size 5106368 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..41a46efaf2ed07d17b748dbf06dfae446b655400 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10aa0e76f0dc599c67ed958c68bf5db6ce92364cb9455a5c29d1d8d256a548ca +size 6013086 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9e4a6f9947e8a479e2dd0f7a1d869c795120bdb --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff68269e6512d6d9e5f5c6a948c2b9c7a71ebcf278d8f37a0db36c620e8b4c4 +size 6928015 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2afd53102b030ebf48b53c7c6c18451ff5e96487 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88557dfea657baea34058c8fe683080946d87a7252500b7a585539b44eeafe6f +size 7832436 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6b18b4557941c7a809b71a26c9dadb4eda65d7db --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa627a64f113ed896a61e5c34692569f17e5fa03def0875faa0bcfda88670ad3 +size 8739772 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..779d4ef9b7b14813a4602f89b7e61ff1b5dc9259 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854f3dece02dd8138c3d74abbfd88b08cceeab2f91990aa9e4cd53bf3260f51b +size 7641705 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..60f2797db838ecaccdc4936cb0650f75f324b491 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd88a4d6bdb1ccea326559ecee18253e754d85e0ae272ecff7c54e530136226e +size 13334422 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..905635b999709dccd3c3bc2fe67240ce2093e138 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e98ca1b23ee2af0c8de8fbab2d026b1bbefe54fe4b18563e290f0e139f153d +size 18925188 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7df7fb562a9fc5f9212e1e2d009c155a1e186d33 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d919a5e2ec60f854006d96bcfaa6d5e9e4242e4bfc0196acd43d350dcb5fd346 +size 24323079 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..930ed169d339da777a65ab29f53a7fb086c076a0 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4637a29f93d1ba14c712cf2ea7a1b92215261cee6190848179347e53bd76d1e8 +size 29466467 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..010599a070ace2127a6fdcf071889e062a05d219 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2807f5dc1cd284d13ae7d7893682dd2c1cc58e5fcb198675da341b1f23f4926a +size 34798959 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..61859b25647b0efc42dadbd2f2e3396e5e1676f6 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f38ca5d0b6b937994f4c014aae39ba22a8cabbf7d93ce61d6bc967b2c60dd7 +size 3619850 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d02b7617ef3a845d3e156843c16c5ce749f7e730 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2cb983612c194ca578662922c9e2eecacf680630449a2c34e9f11933529bfb +size 5007766 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8aef5c04335e4aa4c882615b79e8f298e680183a --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d167a1d6414b65b0bd2708aa27a3c77248982b7c3aae5d49c1276653386ca57e +size 6078397 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..88cecac7cba09ba4e48597f13fd91e2ad4461b58 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac311719b751ccac19ca282e561676386cee8cbc5b4ed086de1ef0b0c59af5ca +size 7150126 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..70ddb231c2618ea2b71d64346311439734325b1b --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adc96ca9a973da403e5b99b59a2b107e4726accc7ba88d3714cf4295b51f0576 +size 8227912 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6baf3fa1431ad2064bdc61507d453bfacc90929d --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee7561c2b4a0735f99f2b3791f30fd5deba074852d0b02586f092107dc662152 +size 9317846 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_0.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..065880c6751c061bb61c8baaeaf53a1a01033dd2 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98aca13d5a8ce7f37c341795dddcf17c7e6e45c7e7637223183ffae4ca332e66 +size 2837189 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_1.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..be0376d55215a9a9f30a8879487b462fa1d6b7df --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388df82e17e0760ed49da8042a2ad4b1d7e2d7e921b887831292f57317f8cd99 +size 5105729 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_2.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a756e3c28af871e553ab0ab5f2f3ccf6b568fb6f --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:001091f48b9b9fab092757644bb6fe0e386bd33913d9f57279939bf7cb076e54 +size 7380419 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_3.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2ef41d6644d86ec7cee0f842661d75d5510608fb --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:397dce8ef901cfe33ea360fc00286916422e10cb380d3d303d04173924f8f2f2 +size 9649665 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_4.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..cfa3c2342c6c9b8211245dcd60c2cea904947cae --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf7f11cd61964c00b0bd0d262fccdb16206c44ddf148ac29c217a318fd067b1 +size 11673743 diff --git a/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_5.jsonl b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8aa752c05648557a4028db88bc2a7616a11c5f17 --- /dev/null +++ b/146m14b1b5/evaluation/generation/examples.146m14b1b5_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43cd6c1b2d5eeec4bc040963a5fd812c364381e898404eae1b20bce80f9385d0 +size 13897487 diff --git a/146m14b1b5/evaluation/generation/merged.csv b/146m14b1b5/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..74b468624f2e21111ad994d2f87825ec03acff89 --- /dev/null +++ b/146m14b1b5/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.0 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.0 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.0790458842297352 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.0790458842297352 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.09494958872823868 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.09494958872823868 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.10541398390697426 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.10541398390697426 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.11600779243298241 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.11600779243298241 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.1273300920831121 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.1273300920831121 +e2e_nlg_cleaned,5,average,multiple,0.08712455689684044 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.023697282994221247 +gem_xsum,0,median,rouge2_fmeasure,0.023697282994221247 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.018497303697898898 +gem_xsum,1,median,rouge2_fmeasure,0.018497303697898898 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.019662620850184154 +gem_xsum,2,median,rouge2_fmeasure,0.019662620850184154 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.018803041332499517 +gem_xsum,3,median,rouge2_fmeasure,0.018803041332499517 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.005563844272859168 +gem_xsum,4,median,rouge2_fmeasure,0.005563844272859168 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.00043251733980451243 +gem_xsum,5,median,rouge2_fmeasure,0.00043251733980451243 +gem_xsum,5,average,multiple,0.014442768414577915 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.03996425304915668 +web_nlg_en,0,median,rouge2_fmeasure,0.03996425304915668 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03533421218479914 +web_nlg_en,1,median,rouge2_fmeasure,0.03533421218479914 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.034017897932799156 +web_nlg_en,2,median,rouge2_fmeasure,0.034017897932799156 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.033414173983495055 +web_nlg_en,3,median,rouge2_fmeasure,0.033414173983495055 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.03361031713979665 +web_nlg_en,4,median,rouge2_fmeasure,0.03361031713979665 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.0336482487712469 +web_nlg_en,5,median,rouge2_fmeasure,0.0336482487712469 +web_nlg_en,5,average,multiple,0.03499818384354893 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.013569083806173431 +wiki_lingua_en,0,median,rouge2_fmeasure,0.013569083806173431 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.014839503603430174 +wiki_lingua_en,1,median,rouge2_fmeasure,0.014839503603430174 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.014779019287405309 +wiki_lingua_en,2,median,rouge2_fmeasure,0.014779019287405309 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.015116060561354339 +wiki_lingua_en,3,median,rouge2_fmeasure,0.015116060561354339 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.006564722152989603 +wiki_lingua_en,4,median,rouge2_fmeasure,0.006564722152989603 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.001256549559807457 +wiki_lingua_en,5,median,rouge2_fmeasure,0.001256549559807457 +wiki_lingua_en,5,average,multiple,0.011020823161860052 diff --git a/146m14b1b5/evaluation/generation/merged.json b/146m14b1b5/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..ad4e020b836d6072b44ec6c0416359724c68aa62 --- /dev/null +++ b/146m14b1b5/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.23336285971913792, "bleu_stderr": 0.026831547216034552, "rouge1_fmeasure": 0.08446571459891085, "rouge1_fmeasure_stderr": 0.0015824965912950772, "rouge1_precision": 0.05439865962968967, "rouge1_precision_stderr": 0.0011092434568872975, "rouge1_recall": 0.24374100837448018, "rouge1_recall_stderr": 0.004154005834452381, "rouge2_fmeasure": 0.03996425304915668, "rouge2_fmeasure_stderr": 0.000989713007421649, "rouge2_precision": 0.02550841626771464, "rouge2_precision_stderr": 0.0006691581780940698, "rouge2_recall": 0.12014632144828626, "rouge2_recall_stderr": 0.002861946551453886, "rougeL_fmeasure": 0.08265416866191247, "rougeL_fmeasure_stderr": 0.0015305727288322003, "rougeL_precision": 0.0531129917083209, "rougeL_precision_stderr": 0.0010634389473944373, "rougeL_recall": 0.23984830582926353, "rougeL_recall_stderr": 0.004111595583702724, "rougeLsum_fmeasure": 0.08134464486582038, "rougeLsum_fmeasure_stderr": 0.0015093395667322115, "rougeLsum_precision": 0.052377103781293156, "rougeLsum_precision_stderr": 0.0010589109148837366, "rougeLsum_recall": 0.23522556317101853, "rougeLsum_recall_stderr": 0.003942690776656362}}, "1": {"PALM_prompt": {"bleu": 0.149999320100551, "bleu_stderr": 0.01648600857868501, "rouge1_fmeasure": 0.0783024499591439, "rouge1_fmeasure_stderr": 0.0015115599781434076, "rouge1_precision": 0.05039389233407519, "rouge1_precision_stderr": 0.0010678479734785133, "rouge1_recall": 0.23911605188763663, "rouge1_recall_stderr": 0.004217097544169996, "rouge2_fmeasure": 0.03533421218479914, "rouge2_fmeasure_stderr": 0.0009441723878638388, "rouge2_precision": 0.022689565244003423, "rouge2_precision_stderr": 0.0006492234607511071, "rouge2_recall": 0.10804796123371856, "rouge2_recall_stderr": 0.002772425440915182, "rougeL_fmeasure": 0.0765840417258607, "rougeL_fmeasure_stderr": 0.0014686631424978348, "rougeL_precision": 0.049215318671431786, "rougeL_precision_stderr": 0.0010308225696802156, "rougeL_recall": 0.23439123752368696, "rougeL_recall_stderr": 0.004133972873584638, "rougeLsum_fmeasure": 0.07550976829415487, "rougeLsum_fmeasure_stderr": 0.0014525784885102127, "rougeLsum_precision": 0.048623672915347575, "rougeLsum_precision_stderr": 0.0010273102967872621, "rougeLsum_recall": 0.22915163434854377, "rougeLsum_recall_stderr": 0.00394130010753112}}, "2": {"PALM_prompt": {"bleu": 0.13305791271479062, "bleu_stderr": 0.011210726963048447, "rouge1_fmeasure": 0.07604718629625555, "rouge1_fmeasure_stderr": 0.0014484478235269312, "rouge1_precision": 0.04874142647590828, "rouge1_precision_stderr": 0.001024411277927112, "rouge1_recall": 0.23418663847997345, "rouge1_recall_stderr": 0.0038675109072007405, "rouge2_fmeasure": 0.034017897932799156, "rouge2_fmeasure_stderr": 0.0009135473678299833, "rouge2_precision": 0.02178125656158737, "rouge2_precision_stderr": 0.0006229612925274444, "rouge2_recall": 0.10462775115881133, "rouge2_recall_stderr": 0.0026242942688188046, "rougeL_fmeasure": 0.07445958002040497, "rougeL_fmeasure_stderr": 0.00141287468923303, "rougeL_precision": 0.047663521069835214, "rougeL_precision_stderr": 0.0009922323810451167, "rougeL_recall": 0.22896434652678516, "rougeL_recall_stderr": 0.003766547321885562, "rougeLsum_fmeasure": 0.0737768400907496, "rougeLsum_fmeasure_stderr": 0.0014096194296899693, "rougeLsum_precision": 0.04730394599720064, "rougeLsum_precision_stderr": 0.000995041687823415, "rougeLsum_recall": 0.22603927988076686, "rougeLsum_recall_stderr": 0.0036652559910908635}}, "3": {"PALM_prompt": {"bleu": 0.10368475460486865, "bleu_stderr": 0.011060778041099317, "rouge1_fmeasure": 0.07485950515286233, "rouge1_fmeasure_stderr": 0.0014144928012380664, "rouge1_precision": 0.048322862828509484, "rouge1_precision_stderr": 0.001091294602922774, "rouge1_recall": 0.2281459683846524, "rouge1_recall_stderr": 0.003630469769192213, "rouge2_fmeasure": 0.033414173983495055, "rouge2_fmeasure_stderr": 0.0008989911483642444, "rouge2_precision": 0.021765575511902755, "rouge2_precision_stderr": 0.0007376999524486791, "rouge2_recall": 0.10112935517460186, "rouge2_recall_stderr": 0.0025273038118034915, "rougeL_fmeasure": 0.0733042353653659, "rougeL_fmeasure_stderr": 0.0013877940713480257, "rougeL_precision": 0.047310277561040476, "rougeL_precision_stderr": 0.0010701645225700237, "rougeL_recall": 0.22271748235416333, "rougeL_recall_stderr": 0.0035317693762082583, "rougeLsum_fmeasure": 0.07279776055251803, "rougeLsum_fmeasure_stderr": 0.0013786706937308762, "rougeLsum_precision": 0.04700153630836809, "rougeLsum_precision_stderr": 0.0010655951813045416, "rougeLsum_recall": 0.22158687380299466, "rougeLsum_recall_stderr": 0.0035274636245185057}}, "4": {"PALM_prompt": {"bleu": 0.12643210930285223, "bleu_stderr": 0.017689451259510113, "rouge1_fmeasure": 0.07550620564282749, "rouge1_fmeasure_stderr": 0.0014275424158662932, "rouge1_precision": 0.048546496122212304, "rouge1_precision_stderr": 0.0010917543766011267, "rouge1_recall": 0.23329777643622032, "rouge1_recall_stderr": 0.00377927308236942, "rouge2_fmeasure": 0.03361031713979665, "rouge2_fmeasure_stderr": 0.0009094774573230576, "rouge2_precision": 0.021812470526869524, "rouge2_precision_stderr": 0.0007410163805045418, "rouge2_recall": 0.10384870030998751, "rouge2_recall_stderr": 0.002559034716018191, "rougeL_fmeasure": 0.0737633812147375, "rougeL_fmeasure_stderr": 0.001396840481575515, "rougeL_precision": 0.047406756420334914, "rougeL_precision_stderr": 0.0010669755185219207, "rougeL_recall": 0.22692062870092394, "rougeL_recall_stderr": 0.003637107260174285, "rougeLsum_fmeasure": 0.07343239839440881, "rougeLsum_fmeasure_stderr": 0.001390175033514176, "rougeLsum_precision": 0.04722034321153461, "rougeLsum_precision_stderr": 0.0010651409333852174, "rougeLsum_recall": 0.22637726105546893, "rougeLsum_recall_stderr": 0.0036450661129995517}}, "5": {"PALM_prompt": {"bleu": 0.14580784113904285, "bleu_stderr": 0.02169837835551737, "rouge1_fmeasure": 0.07635558897011635, "rouge1_fmeasure_stderr": 0.0014067473320664224, "rouge1_precision": 0.04858688695146614, "rouge1_precision_stderr": 0.0009919570128492023, "rouge1_recall": 0.23936897854801265, "rouge1_recall_stderr": 0.003809570838617069, "rouge2_fmeasure": 0.0336482487712469, "rouge2_fmeasure_stderr": 0.0008809384464421846, "rouge2_precision": 0.021403300583302882, "rouge2_precision_stderr": 0.0005992879742971646, "rouge2_recall": 0.10635401405034506, "rouge2_recall_stderr": 0.0026320301985926294, "rougeL_fmeasure": 0.07457995007202929, "rougeL_fmeasure_stderr": 0.0013717292986929117, "rougeL_precision": 0.04743074568047055, "rougeL_precision_stderr": 0.0009628261081974606, "rougeL_recall": 0.23310511154456653, "rougeL_recall_stderr": 0.003667912499413062, "rougeLsum_fmeasure": 0.07413039496102994, "rougeLsum_fmeasure_stderr": 0.0013645157195961873, "rougeLsum_precision": 0.04717939159930382, "rougeLsum_precision_stderr": 0.0009611612661616537, "rougeLsum_recall": 0.23214615779014944, "rougeLsum_recall_stderr": 0.0036745231846563382}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.5430730295333313, "bleu_stderr": 0.0365815466590515, "rouge1_fmeasure": 0.10422675836754251, "rouge1_fmeasure_stderr": 0.001600341131106392, "rouge1_precision": 0.09363473178880811, "rouge1_precision_stderr": 0.0016796809875309112, "rouge1_recall": 0.14418256512962083, "rouge1_recall_stderr": 0.002167398392749155, "rouge2_fmeasure": 0.013569083806173431, "rouge2_fmeasure_stderr": 0.000536933125563704, "rouge2_precision": 0.012160392139666808, "rouge2_precision_stderr": 0.0005105463730603428, "rouge2_recall": 0.019121459317877912, "rouge2_recall_stderr": 0.0008232714019808441, "rougeL_fmeasure": 0.09199729526410924, "rougeL_fmeasure_stderr": 0.0013200459901581483, "rougeL_precision": 0.0820107922443091, "rougeL_precision_stderr": 0.0013749923256347872, "rougeL_recall": 0.12893888139963622, "rougeL_recall_stderr": 0.0018862133214712646, "rougeLsum_fmeasure": 0.09659833472928117, "rougeLsum_fmeasure_stderr": 0.0014598821948214504, "rougeLsum_precision": 0.08652832387527064, "rougeLsum_precision_stderr": 0.0015294411673741832, "rougeLsum_recall": 0.1344951570087262, "rougeLsum_recall_stderr": 0.0020253753831603227}}, "1": {"tldr_en": {"bleu": 0.7631030821901151, "bleu_stderr": 0.03637126016426104, "rouge1_fmeasure": 0.12944623984650427, "rouge1_fmeasure_stderr": 0.0015508995745003894, "rouge1_precision": 0.113864402018691, "rouge1_precision_stderr": 0.0015404538134881224, "rouge1_recall": 0.1838117919523142, "rouge1_recall_stderr": 0.0023514998715679107, "rouge2_fmeasure": 0.014839503603430174, "rouge2_fmeasure_stderr": 0.0005401931668179484, "rouge2_precision": 0.012715360202877583, "rouge2_precision_stderr": 0.0004729696535287597, "rouge2_recall": 0.022779782580969615, "rouge2_recall_stderr": 0.0010072688122998766, "rougeL_fmeasure": 0.09507660161625574, "rougeL_fmeasure_stderr": 0.0010200141214599331, "rougeL_precision": 0.08328579570968432, "rougeL_precision_stderr": 0.0010324983906124013, "rougeL_recall": 0.1380185552697111, "rougeL_recall_stderr": 0.001751171256108266, "rougeLsum_fmeasure": 0.12204763022142331, "rougeLsum_fmeasure_stderr": 0.0014376576746715666, "rougeLsum_precision": 0.10740807503883001, "rougeLsum_precision_stderr": 0.0014382706335826095, "rougeLsum_recall": 0.17373508082119807, "rougeLsum_recall_stderr": 0.0022092321023785564}}, "2": {"tldr_en": {"bleu": 0.745302789030922, "bleu_stderr": 0.041165212986491714, "rouge1_fmeasure": 0.125968585192418, "rouge1_fmeasure_stderr": 0.001567482533109581, "rouge1_precision": 0.11393029559700361, "rouge1_precision_stderr": 0.0016763730882923063, "rouge1_recall": 0.1769694736700524, "rouge1_recall_stderr": 0.0023424150386553723, "rouge2_fmeasure": 0.014779019287405309, "rouge2_fmeasure_stderr": 0.0005575381148601111, "rouge2_precision": 0.01301367400947571, "rouge2_precision_stderr": 0.0005143488258905196, "rouge2_recall": 0.021873142006066014, "rouge2_recall_stderr": 0.000934130847859976, "rougeL_fmeasure": 0.09713467955259053, "rougeL_fmeasure_stderr": 0.0011252807281427579, "rougeL_precision": 0.0874972075486996, "rougeL_precision_stderr": 0.001231137532979079, "rougeL_recall": 0.13892758321292495, "rougeL_recall_stderr": 0.001814780335285223, "rougeLsum_fmeasure": 0.11827617802959285, "rougeLsum_fmeasure_stderr": 0.0014395913434439003, "rougeLsum_precision": 0.10695613459471714, "rougeLsum_precision_stderr": 0.0015471539643745647, "rougeLsum_recall": 0.16645890967746207, "rougeLsum_recall_stderr": 0.00217354070441736}}, "3": {"tldr_en": {"bleu": 0.9375524807033079, "bleu_stderr": 0.05076091375311075, "rouge1_fmeasure": 0.1080907737092221, "rouge1_fmeasure_stderr": 0.0017189902209006306, "rouge1_precision": 0.10517912682002659, "rouge1_precision_stderr": 0.0020169944192517456, "rouge1_recall": 0.14986203056954372, "rouge1_recall_stderr": 0.002553781810198287, "rouge2_fmeasure": 0.015116060561354339, "rouge2_fmeasure_stderr": 0.0005706720135531178, "rouge2_precision": 0.01411384967174985, "rouge2_precision_stderr": 0.0005796594561779423, "rouge2_recall": 0.022758512637964766, "rouge2_recall_stderr": 0.0010268466428699436, "rougeL_fmeasure": 0.08717745912804374, "rougeL_fmeasure_stderr": 0.0013335661136481402, "rougeL_precision": 0.08500267190413809, "rougeL_precision_stderr": 0.0016364237998568027, "rougeL_recall": 0.12241576028305143, "rougeL_recall_stderr": 0.0020787177316311327, "rougeLsum_fmeasure": 0.10075080145793731, "rougeLsum_fmeasure_stderr": 0.0015788401832700393, "rougeLsum_precision": 0.09826602680946438, "rougeLsum_precision_stderr": 0.0018880997129709805, "rougeLsum_recall": 0.13990265269905022, "rougeLsum_recall_stderr": 0.0023636459311459136}}, "4": {"tldr_en": {"bleu": 0.2455685388768164, "bleu_stderr": 0.023312196201743386, "rouge1_fmeasure": 0.038297790163100025, "rouge1_fmeasure_stderr": 0.001450936870070239, "rouge1_precision": 0.04021891620427611, "rouge1_precision_stderr": 0.00179527122100358, "rouge1_recall": 0.05458669351809771, "rouge1_recall_stderr": 0.0021686949760662015, "rouge2_fmeasure": 0.006564722152989603, "rouge2_fmeasure_stderr": 0.00044826882706775823, "rouge2_precision": 0.007175083221520411, "rouge2_precision_stderr": 0.0006861959426040823, "rouge2_recall": 0.010377039775321539, "rouge2_recall_stderr": 0.0008398548385378956, "rougeL_fmeasure": 0.03193814381233648, "rougeL_fmeasure_stderr": 0.0011842243614109398, "rougeL_precision": 0.03365949856690121, "rougeL_precision_stderr": 0.001518451427611522, "rougeL_recall": 0.046270525077678676, "rougeL_recall_stderr": 0.0018467375730547731, "rougeLsum_fmeasure": 0.03543698761206124, "rougeLsum_fmeasure_stderr": 0.0013366330300040165, "rougeLsum_precision": 0.03743305339041787, "rougeLsum_precision_stderr": 0.0016843420360592007, "rougeLsum_recall": 0.0506510365437872, "rougeLsum_recall_stderr": 0.002014009342396866}}, "5": {"tldr_en": {"bleu": 5.018187832788465e-07, "bleu_stderr": 9.001238569701927e-07, "rouge1_fmeasure": 0.006452133253476872, "rouge1_fmeasure_stderr": 0.0006426391950933609, "rouge1_precision": 0.00666688354748637, "rouge1_precision_stderr": 0.0007557736630150564, "rouge1_recall": 0.009539134461787335, "rouge1_recall_stderr": 0.0010266547706396002, "rouge2_fmeasure": 0.001256549559807457, "rouge2_fmeasure_stderr": 0.00021119936913661453, "rouge2_precision": 0.001389249823684832, "rouge2_precision_stderr": 0.0003303542509473268, "rouge2_recall": 0.002107899216680717, "rouge2_recall_stderr": 0.000406297855701019, "rougeL_fmeasure": 0.005351282375182499, "rougeL_fmeasure_stderr": 0.0005270601162356447, "rougeL_precision": 0.0055221266380235945, "rougeL_precision_stderr": 0.0006252483839771813, "rougeL_recall": 0.00798094708260653, "rougeL_recall_stderr": 0.0008641648642621242, "rougeLsum_fmeasure": 0.0059470818044703435, "rougeLsum_fmeasure_stderr": 0.0005945085971730911, "rougeLsum_precision": 0.006186563146749757, "rougeLsum_precision_stderr": 0.0007119271694803169, "rougeLsum_recall": 0.008778707261151484, "rougeLsum_recall_stderr": 0.0009486499053541579}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.0, "bleu_stderr": 0.0, "rouge1_fmeasure": 0.0, "rouge1_fmeasure_stderr": 0.0, "rouge1_precision": 0.0, "rouge1_precision_stderr": 0.0, "rouge1_recall": 0.0, "rouge1_recall_stderr": 0.0, "rouge2_fmeasure": 0.0, "rouge2_fmeasure_stderr": 0.0, "rouge2_precision": 0.0, "rouge2_precision_stderr": 0.0, "rouge2_recall": 0.0, "rouge2_recall_stderr": 0.0, "rougeL_fmeasure": 0.0, "rougeL_fmeasure_stderr": 0.0, "rougeL_precision": 0.0, "rougeL_precision_stderr": 0.0, "rougeL_recall": 0.0, "rougeL_recall_stderr": 0.0, "rougeLsum_fmeasure": 0.0, "rougeLsum_fmeasure_stderr": 0.0, "rougeLsum_precision": 0.0, "rougeLsum_precision_stderr": 0.0, "rougeLsum_recall": 0.0, "rougeLsum_recall_stderr": 0.0}}, "1": {"generate_text_restaurant": {"bleu": 4.009859551325065, "bleu_stderr": 0.13278046275335678, "rouge1_fmeasure": 0.2740595010030775, "rouge1_fmeasure_stderr": 0.002385396193975501, "rouge1_precision": 0.3578229387593085, "rouge1_precision_stderr": 0.003856721380827842, "rouge1_recall": 0.25948739086059736, "rouge1_recall_stderr": 0.002640226000794725, "rouge2_fmeasure": 0.0790458842297352, "rouge2_fmeasure_stderr": 0.001445148755996047, "rouge2_precision": 0.13054044528410688, "rouge2_precision_stderr": 0.0037429245675078405, "rouge2_recall": 0.07406180042426694, "rouge2_recall_stderr": 0.0014501941041824516, "rougeL_fmeasure": 0.2018458460713563, "rougeL_fmeasure_stderr": 0.0018101367404328157, "rougeL_precision": 0.2735828328272949, "rougeL_precision_stderr": 0.0036193449495243797, "rougeL_recall": 0.1900509586663965, "rougeL_recall_stderr": 0.001986984907305974, "rougeLsum_fmeasure": 0.22766636955410458, "rougeLsum_fmeasure_stderr": 0.002095153498100042, "rougeLsum_precision": 0.30371799193520876, "rougeLsum_precision_stderr": 0.003752209403616017, "rougeLsum_recall": 0.21475934437687166, "rougeLsum_recall_stderr": 0.0022815651046262236}}, "2": {"generate_text_restaurant": {"bleu": 4.7806427294993, "bleu_stderr": 0.1310832353636796, "rouge1_fmeasure": 0.2720615139154822, "rouge1_fmeasure_stderr": 0.0027687783144970625, "rouge1_precision": 0.4023615733786828, "rouge1_precision_stderr": 0.0050997654220666, "rouge1_recall": 0.2604859328757677, "rouge1_recall_stderr": 0.003124610081613822, "rouge2_fmeasure": 0.09494958872823868, "rouge2_fmeasure_stderr": 0.0016238555093460345, "rouge2_precision": 0.20471652877426683, "rouge2_precision_stderr": 0.0053699474888414716, "rouge2_recall": 0.08914356908095893, "rouge2_recall_stderr": 0.0017253335935271583, "rougeL_fmeasure": 0.20383717565040163, "rougeL_fmeasure_stderr": 0.002104411024247141, "rougeL_precision": 0.324312305309799, "rougeL_precision_stderr": 0.005059911825690109, "rougeL_recall": 0.19325765142384435, "rougeL_recall_stderr": 0.002374927639383697, "rougeLsum_fmeasure": 0.2287941438454128, "rougeLsum_fmeasure_stderr": 0.0024025488670924725, "rougeLsum_precision": 0.35265257546958945, "rougeLsum_precision_stderr": 0.005084624220690631, "rougeLsum_recall": 0.21792408382120357, "rougeLsum_recall_stderr": 0.002710638175002805}}, "3": {"generate_text_restaurant": {"bleu": 5.379158787794686, "bleu_stderr": 0.17424945572147568, "rouge1_fmeasure": 0.2739605028454004, "rouge1_fmeasure_stderr": 0.0028626358837710963, "rouge1_precision": 0.4200250914129759, "rouge1_precision_stderr": 0.0053484767449253675, "rouge1_recall": 0.2597005038015286, "rouge1_recall_stderr": 0.0031776678746421685, "rouge2_fmeasure": 0.10541398390697426, "rouge2_fmeasure_stderr": 0.0017164566544297254, "rouge2_precision": 0.2324625707379104, "rouge2_precision_stderr": 0.005656196308354222, "rouge2_recall": 0.09787654675007354, "rouge2_recall_stderr": 0.00178618058095944, "rougeL_fmeasure": 0.20932493009842154, "rougeL_fmeasure_stderr": 0.002213566067169264, "rougeL_precision": 0.3452442707098063, "rougeL_precision_stderr": 0.005326530824347986, "rougeL_recall": 0.19634967424253236, "rougeL_recall_stderr": 0.002444802090072814, "rougeLsum_fmeasure": 0.23343576076206798, "rougeLsum_fmeasure_stderr": 0.0025054329416814864, "rougeLsum_precision": 0.3730608496140118, "rougeLsum_precision_stderr": 0.0053445330283940886, "rougeLsum_recall": 0.21983351464384387, "rougeLsum_recall_stderr": 0.0027657542997588506}}, "4": {"generate_text_restaurant": {"bleu": 5.84362787810031, "bleu_stderr": 0.1914395641675779, "rouge1_fmeasure": 0.28912203831106437, "rouge1_fmeasure_stderr": 0.0028684342474448, "rouge1_precision": 0.44142915899786084, "rouge1_precision_stderr": 0.005252644410011952, "rouge1_recall": 0.27293406189445324, "rouge1_recall_stderr": 0.003231606561873733, "rouge2_fmeasure": 0.11600779243298241, "rouge2_fmeasure_stderr": 0.0017448325124731262, "rouge2_precision": 0.24580187764860698, "rouge2_precision_stderr": 0.005582820891133567, "rouge2_recall": 0.10804855284127729, "rouge2_recall_stderr": 0.0018386317913512528, "rougeL_fmeasure": 0.22136695075479842, "rougeL_fmeasure_stderr": 0.002213265282685222, "rougeL_precision": 0.36278920910440526, "rougeL_precision_stderr": 0.005259330590831443, "rougeL_recall": 0.20655546999122476, "rougeL_recall_stderr": 0.0024658407342331543, "rougeLsum_fmeasure": 0.24635463422951365, "rougeLsum_fmeasure_stderr": 0.002518259186545489, "rougeLsum_precision": 0.39113354412345547, "rougeLsum_precision_stderr": 0.005259888245133616, "rougeLsum_recall": 0.23109513502236428, "rougeLsum_recall_stderr": 0.0028128632311166086}}, "5": {"generate_text_restaurant": {"bleu": 6.294657081635081, "bleu_stderr": 0.1191423611800128, "rouge1_fmeasure": 0.30818385424543937, "rouge1_fmeasure_stderr": 0.002812315846665837, "rouge1_precision": 0.45758123798499606, "rouge1_precision_stderr": 0.005027609714045855, "rouge1_recall": 0.2870117768804813, "rouge1_recall_stderr": 0.0031304392663814677, "rouge2_fmeasure": 0.1273300920831121, "rouge2_fmeasure_stderr": 0.0017849338112142722, "rouge2_precision": 0.2503889688775632, "rouge2_precision_stderr": 0.00532362565564733, "rouge2_recall": 0.11655912503614715, "rouge2_recall_stderr": 0.0018305152372365434, "rougeL_fmeasure": 0.23900514298098396, "rougeL_fmeasure_stderr": 0.0022097628891594285, "rougeL_precision": 0.37623432283720026, "rougeL_precision_stderr": 0.005027601070585646, "rougeL_recall": 0.22058281726854637, "rougeL_recall_stderr": 0.0024416343969262534, "rougeLsum_fmeasure": 0.2631784707358199, "rougeLsum_fmeasure_stderr": 0.002487054103176312, "rougeLsum_precision": 0.40434981140634907, "rougeLsum_precision_stderr": 0.005043898533403243, "rougeLsum_recall": 0.24370773833649576, "rougeLsum_recall_stderr": 0.002739699465790519}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.7848159984641091, "bleu_stderr": 0.0846489540307912, "rouge1_fmeasure": 0.13822546470552896, "rouge1_fmeasure_stderr": 0.0020817271615335853, "rouge1_precision": 0.09869862910594125, "rouge1_precision_stderr": 0.0015754263415516496, "rouge1_recall": 0.2425997427212717, "rouge1_recall_stderr": 0.0035498618411088337, "rouge2_fmeasure": 0.023697282994221247, "rouge2_fmeasure_stderr": 0.0009611791067872703, "rouge2_precision": 0.016787743878533688, "rouge2_precision_stderr": 0.0007005319097605839, "rouge2_recall": 0.0425780543002491, "rouge2_recall_stderr": 0.0017220443443167939, "rougeL_fmeasure": 0.12098743849059979, "rougeL_fmeasure_stderr": 0.001680519826816699, "rougeL_precision": 0.08626521539341878, "rougeL_precision_stderr": 0.0012705285495719658, "rougeL_recall": 0.21327598743091675, "rougeL_recall_stderr": 0.0029459890547047195, "rougeLsum_fmeasure": 0.1053525696001833, "rougeLsum_fmeasure_stderr": 0.001600085618320443, "rougeLsum_precision": 0.07499548048418175, "rougeLsum_precision_stderr": 0.0011973587779807702, "rougeLsum_recall": 0.18673351936586377, "rougeLsum_recall_stderr": 0.002877930418991892}}, "1": {"article_DOC_summary": {"bleu": 0.6629456628355466, "bleu_stderr": 0.08774497677853796, "rouge1_fmeasure": 0.12866565250951, "rouge1_fmeasure_stderr": 0.0019918374752407302, "rouge1_precision": 0.09124145037680485, "rouge1_precision_stderr": 0.0014659607841345699, "rouge1_recall": 0.22757570030500165, "rouge1_recall_stderr": 0.0034586902105503583, "rouge2_fmeasure": 0.018497303697898898, "rouge2_fmeasure_stderr": 0.0008688724913792904, "rouge2_precision": 0.012967504798466119, "rouge2_precision_stderr": 0.0006125196007516605, "rouge2_recall": 0.03365509386476815, "rouge2_recall_stderr": 0.0015985879929148613, "rougeL_fmeasure": 0.11258738658901131, "rougeL_fmeasure_stderr": 0.001654534085681258, "rougeL_precision": 0.07973413334544867, "rougeL_precision_stderr": 0.0012126850079835258, "rougeL_recall": 0.1999503525963445, "rougeL_recall_stderr": 0.0029346063767394658, "rougeLsum_fmeasure": 0.09952018094863402, "rougeLsum_fmeasure_stderr": 0.0015543045742524805, "rougeLsum_precision": 0.07040045782230749, "rougeLsum_precision_stderr": 0.0011336127776379584, "rougeLsum_recall": 0.17735154114603943, "rougeLsum_recall_stderr": 0.002793790877961365}}, "2": {"article_DOC_summary": {"bleu": 0.6581479118355196, "bleu_stderr": 0.07454636170059836, "rouge1_fmeasure": 0.1275054327542126, "rouge1_fmeasure_stderr": 0.001936094417525703, "rouge1_precision": 0.09013053440861082, "rouge1_precision_stderr": 0.0014220128061208542, "rouge1_recall": 0.2273610167464681, "rouge1_recall_stderr": 0.0034008282640079156, "rouge2_fmeasure": 0.019662620850184154, "rouge2_fmeasure_stderr": 0.0008838457269276937, "rouge2_precision": 0.013685249717276669, "rouge2_precision_stderr": 0.0006153358129211847, "rouge2_recall": 0.03651263650932684, "rouge2_recall_stderr": 0.0016893088610047168, "rougeL_fmeasure": 0.11364996341359528, "rougeL_fmeasure_stderr": 0.0016363233390032223, "rougeL_precision": 0.08023430784844289, "rougeL_precision_stderr": 0.0011971788003166785, "rougeL_recall": 0.20334042051960866, "rougeL_recall_stderr": 0.0029270688857344516, "rougeLsum_fmeasure": 0.09766604816138601, "rougeLsum_fmeasure_stderr": 0.001494448373327452, "rougeLsum_precision": 0.06885212236096724, "rougeLsum_precision_stderr": 0.0010822260185504432, "rougeLsum_recall": 0.17558641489646157, "rougeLsum_recall_stderr": 0.0027535532125502033}}, "3": {"article_DOC_summary": {"bleu": 0.6487530237798568, "bleu_stderr": 0.07015562797768161, "rouge1_fmeasure": 0.12223643758741752, "rouge1_fmeasure_stderr": 0.002043267213282596, "rouge1_precision": 0.08887731112301472, "rouge1_precision_stderr": 0.0016693801787118184, "rouge1_recall": 0.2137238957435145, "rouge1_recall_stderr": 0.0035351437827021116, "rouge2_fmeasure": 0.018803041332499517, "rouge2_fmeasure_stderr": 0.0009204376153200208, "rouge2_precision": 0.01336910380993121, "rouge2_precision_stderr": 0.0006644057446671918, "rouge2_recall": 0.034200003110033475, "rouge2_recall_stderr": 0.001715535153457774, "rougeL_fmeasure": 0.10976787111848983, "rougeL_fmeasure_stderr": 0.001747164342136605, "rougeL_precision": 0.07951277816840352, "rougeL_precision_stderr": 0.0013813570731518456, "rougeL_recall": 0.1929746396810042, "rougeL_recall_stderr": 0.0031195403408695475, "rougeLsum_fmeasure": 0.09547379870346774, "rougeLsum_fmeasure_stderr": 0.0015781898824613876, "rougeLsum_precision": 0.0692667028628289, "rougeLsum_precision_stderr": 0.0012719926110399762, "rougeLsum_recall": 0.16841312507750444, "rougeLsum_recall_stderr": 0.0028632604489444353}}, "4": {"article_DOC_summary": {"bleu": 0.31677978361218245, "bleu_stderr": 0.11491101515831749, "rouge1_fmeasure": 0.0352935136087583, "rouge1_fmeasure_stderr": 0.002067265021549025, "rouge1_precision": 0.030778909853395358, "rouge1_precision_stderr": 0.0020706771099415055, "rouge1_recall": 0.05446169618855107, "rouge1_recall_stderr": 0.0031240949944244803, "rouge2_fmeasure": 0.005563844272859168, "rouge2_fmeasure_stderr": 0.0005736273252218394, "rouge2_precision": 0.004626594678435454, "rouge2_precision_stderr": 0.0005432919775929613, "rouge2_recall": 0.008754579533738186, "rouge2_recall_stderr": 0.0008926051691577739, "rougeL_fmeasure": 0.030796858749521736, "rougeL_fmeasure_stderr": 0.0017427639473783613, "rougeL_precision": 0.026771846295171677, "rougeL_precision_stderr": 0.001757008435057672, "rougeL_recall": 0.04820099757783116, "rougeL_recall_stderr": 0.002741549258464254, "rougeLsum_fmeasure": 0.02795059198035774, "rougeLsum_fmeasure_stderr": 0.0016496415851520105, "rougeLsum_precision": 0.024799039714845435, "rougeLsum_precision_stderr": 0.0017459449627102337, "rougeLsum_recall": 0.043284204930067245, "rougeLsum_recall_stderr": 0.0025292545975519414}}, "5": {"article_DOC_summary": {"bleu": 5.0129268480742525e-39, "bleu_stderr": 2.561058897904247e-33, "rouge1_fmeasure": 0.0022773127753522747, "rouge1_fmeasure_stderr": 0.00063421295615357, "rouge1_precision": 0.0024889331628189042, "rouge1_precision_stderr": 0.0006815397373640728, "rouge1_recall": 0.002181607723763231, "rouge1_recall_stderr": 0.0006183413432850734, "rouge2_fmeasure": 0.00043251733980451243, "rouge2_fmeasure_stderr": 0.0001624532835097625, "rouge2_precision": 0.0005040504428810351, "rouge2_precision_stderr": 0.00018325075012185258, "rouge2_recall": 0.0003980023860711985, "rouge2_recall_stderr": 0.00015801551672316093, "rougeL_fmeasure": 0.001966631793324211, "rougeL_fmeasure_stderr": 0.0005373577051134612, "rougeL_precision": 0.002154819197651723, "rougeL_precision_stderr": 0.0005785407429483231, "rougeL_recall": 0.00188617310535414, "rougeL_recall_stderr": 0.0005267417519795776, "rougeLsum_fmeasure": 0.0019792726415445738, "rougeLsum_fmeasure_stderr": 0.0005523755455290908, "rougeLsum_precision": 0.002154819197651723, "rougeLsum_precision_stderr": 0.000586342779903712, "rougeLsum_recall": 0.0019061538009282621, "rougeLsum_recall_stderr": 0.0005449980000845078}}}} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..916fc3b47cf80b51c14ec1d9ec7c18b2d32194fb --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.23336285971913792, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.026831547216034552 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05439865962968967, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011092434568872975 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.24374100837448018, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004154005834452381 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08446571459891085, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015824965912950772 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02550841626771464, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006691581780940698 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.12014632144828626, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002861946551453886 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03996425304915668, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000989713007421649 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.0531129917083209, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010634389473944373 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23984830582926353, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004111595583702724 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08265416866191247, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015305727288322003 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.052377103781293156, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010589109148837366 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23522556317101853, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003942690776656362 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08134464486582038, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015093395667322115 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..eb5bf5669aabc8cc5c9ae63b47cf6625144c70e5 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.149999320100551, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.01648600857868501 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05039389233407519, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010678479734785133 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23911605188763663, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004217097544169996 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.0783024499591439, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015115599781434076 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.022689565244003423, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006492234607511071 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10804796123371856, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002772425440915182 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03533421218479914, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009441723878638388 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.049215318671431786, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010308225696802156 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23439123752368696, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.004133972873584638 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0765840417258607, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014686631424978348 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.048623672915347575, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010273102967872621 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22915163434854377, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00394130010753112 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07550976829415487, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014525784885102127 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ceb0340064d370a8416ccd3baaf7a7f1b41a9a2b --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.13305791271479062, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.011210726963048447 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04874142647590828, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001024411277927112 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23418663847997345, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038675109072007405 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07604718629625555, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014484478235269312 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02178125656158737, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006229612925274444 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10462775115881133, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026242942688188046 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.034017897932799156, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009135473678299833 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.047663521069835214, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009922323810451167 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.22896434652678516, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003766547321885562 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07445958002040497, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.00141287468923303 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04730394599720064, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.000995041687823415 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22603927988076686, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036652559910908635 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.0737768400907496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014096194296899693 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b4fdbc8d996361d6b46f6c28f010b908f25378a1 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.10368475460486865, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.011060778041099317 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.048322862828509484, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001091294602922774 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2281459683846524, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003630469769192213 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07485950515286233, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014144928012380664 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021765575511902755, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007376999524486791 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10112935517460186, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025273038118034915 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.033414173983495055, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008989911483642444 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.047310277561040476, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010701645225700237 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.22271748235416333, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0035317693762082583 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0733042353653659, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013877940713480257 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04700153630836809, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010655951813045416 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22158687380299466, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035274636245185057 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07279776055251803, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013786706937308762 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..731c7acc8c8f3c78212efe587f2b5fc8aaaa86b6 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.12643210930285223, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.017689451259510113 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.048546496122212304, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010917543766011267 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23329777643622032, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.00377927308236942 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07550620564282749, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014275424158662932 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021812470526869524, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007410163805045418 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10384870030998751, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002559034716018191 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03361031713979665, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009094774573230576 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.047406756420334914, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010669755185219207 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.22692062870092394, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003637107260174285 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.0737633812147375, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001396840481575515 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04722034321153461, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010651409333852174 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22637726105546893, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036450661129995517 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07343239839440881, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001390175033514176 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0847e71344ddf6cca2699da5d7af036e86e4aa04 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.14580784113904285, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02169837835551737 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04858688695146614, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0009919570128492023 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23936897854801265, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003809570838617069 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07635558897011635, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014067473320664224 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.021403300583302882, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005992879742971646 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10635401405034506, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0026320301985926294 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.0336482487712469, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008809384464421846 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04743074568047055, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0009628261081974606 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23310511154456653, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003667912499413062 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07457995007202929, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013717292986929117 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04717939159930382, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0009611612661616537 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23214615779014944, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036745231846563382 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07413039496102994, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013645157195961873 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..9ec449f151f721e101e58761bedd3db6feeb9c47 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09363473178880811, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016796809875309112 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14418256512962083, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002167398392749155 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10422675836754251, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001600341131106392 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012160392139666808, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005105463730603428 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.019121459317877912, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008232714019808441 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.013569083806173431, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000536933125563704 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0820107922443091, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013749923256347872 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12893888139963622, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0018862133214712646 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09199729526410924, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013200459901581483 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08652832387527064, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0015294411673741832 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1344951570087262, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0020253753831603227 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09659833472928117, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014598821948214504 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5430730295333313, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0365815466590515 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..5f3c375fef396618d76065b551bb3b3ad67d48f3 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.113864402018691, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015404538134881224 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1838117919523142, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023514998715679107 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.12944623984650427, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015508995745003894 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012715360202877583, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004729696535287597 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.022779782580969615, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0010072688122998766 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014839503603430174, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005401931668179484 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08328579570968432, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010324983906124013 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1380185552697111, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001751171256108266 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09507660161625574, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010200141214599331 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.10740807503883001, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014382706335826095 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.17373508082119807, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0022092321023785564 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.12204763022142331, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014376576746715666 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7631030821901151, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03637126016426104 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a58710e0fc6cb228b0eb94a29e81b230ae8ffceb --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.11393029559700361, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016763730882923063 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1769694736700524, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023424150386553723 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.125968585192418, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001567482533109581 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.01301367400947571, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005143488258905196 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.021873142006066014, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000934130847859976 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014779019287405309, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005575381148601111 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0874972075486996, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001231137532979079 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.13892758321292495, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001814780335285223 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09713467955259053, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011252807281427579 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.10695613459471714, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0015471539643745647 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.16645890967746207, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.00217354070441736 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.11827617802959285, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014395913434439003 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.745302789030922, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.041165212986491714 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..af0c1edd4841faaaafc56eb57d3e119f0ecdfba1 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10517912682002659, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0020169944192517456 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.14986203056954372, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002553781810198287 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.1080907737092221, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0017189902209006306 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.01411384967174985, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0005796594561779423 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.022758512637964766, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0010268466428699436 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.015116060561354339, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005706720135531178 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08500267190413809, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0016364237998568027 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12241576028305143, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0020787177316311327 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08717745912804374, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013335661136481402 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09826602680946438, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018880997129709805 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.13990265269905022, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0023636459311459136 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10075080145793731, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015788401832700393 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.9375524807033079, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.05076091375311075 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8813affc9db19b8847fbc95248d1c610f09a2b --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.04021891620427611, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.00179527122100358 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.05458669351809771, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021686949760662015 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.038297790163100025, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001450936870070239 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.007175083221520411, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006861959426040823 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.010377039775321539, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008398548385378956 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.006564722152989603, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00044826882706775823 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.03365949856690121, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001518451427611522 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.046270525077678676, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0018467375730547731 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.03193814381233648, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0011842243614109398 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.03743305339041787, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016843420360592007 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.0506510365437872, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002014009342396866 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03543698761206124, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013366330300040165 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.2455685388768164, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.023312196201743386 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ac2d71cbfd2cd93ebcbe5765196af17b84389bdb --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.00666688354748637, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0007557736630150564 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.009539134461787335, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0010266547706396002 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.006452133253476872, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006426391950933609 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.001389249823684832, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003303542509473268 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.002107899216680717, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000406297855701019 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.001256549559807457, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00021119936913661453 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0055221266380235945, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0006252483839771813 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.00798094708260653, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0008641648642621242 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.005351282375182499, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0005270601162356447 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.006186563146749757, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0007119271694803169 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.008778707261151484, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0009486499053541579 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.0059470818044703435, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005945085971730911 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 5.018187832788465e-07, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 9.001238569701927e-07 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..36733419b0e3aef3d1ce9c746e14892529952c60 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.0, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..27230643a29c4bcadbceace227523af70bd6ef1c --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 4.009859551325065, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.13278046275335678 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3578229387593085, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003856721380827842 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.25948739086059736, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002640226000794725 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2740595010030775, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002385396193975501 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.13054044528410688, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0037429245675078405 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07406180042426694, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0014501941041824516 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0790458842297352, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001445148755996047 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2735828328272949, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0036193449495243797 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1900509586663965, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.001986984907305974 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2018458460713563, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018101367404328157 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.30371799193520876, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.003752209403616017 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21475934437687166, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022815651046262236 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.22766636955410458, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002095153498100042 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..72026d80ab1c54221c710e7787246c3793a99610 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 4.7806427294993, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1310832353636796 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4023615733786828, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0050997654220666 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2604859328757677, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003124610081613822 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2720615139154822, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0027687783144970625 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.20471652877426683, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0053699474888414716 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.08914356908095893, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0017253335935271583 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.09494958872823868, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0016238555093460345 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.324312305309799, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005059911825690109 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19325765142384435, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002374927639383697 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20383717565040163, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002104411024247141 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.35265257546958945, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005084624220690631 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21792408382120357, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002710638175002805 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2287941438454128, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0024025488670924725 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..d716943ef92a49891574eb8efdbfab6d9fb180ff --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 5.379158787794686, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.17424945572147568 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.4200250914129759, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0053484767449253675 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2597005038015286, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0031776678746421685 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2739605028454004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0028626358837710963 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.2324625707379104, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005656196308354222 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.09787654675007354, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.00178618058095944 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.10541398390697426, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017164566544297254 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.3452442707098063, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005326530824347986 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19634967424253236, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002444802090072814 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20932493009842154, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002213566067169264 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3730608496140118, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0053445330283940886 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21983351464384387, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0027657542997588506 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23343576076206798, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0025054329416814864 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..182d54386438cffab223b0940da0f82dcbaba341 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 5.84362787810031, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1914395641675779 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.44142915899786084, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005252644410011952 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.27293406189445324, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.003231606561873733 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.28912203831106437, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0028684342474448 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.24580187764860698, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005582820891133567 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.10804855284127729, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018386317913512528 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.11600779243298241, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017448325124731262 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.36278920910440526, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005259330590831443 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.20655546999122476, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024658407342331543 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.22136695075479842, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.002213265282685222 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.39113354412345547, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005259888245133616 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.23109513502236428, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0028128632311166086 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.24635463422951365, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002518259186545489 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ec932546e1f7a6e08d2aac10e6d0b0fc5478df68 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 6.294657081635081, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.1191423611800128 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.45758123798499606, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005027609714045855 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2870117768804813, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0031304392663814677 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.30818385424543937, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002812315846665837 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.2503889688775632, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.00532362565564733 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.11655912503614715, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0018305152372365434 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.1273300920831121, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0017849338112142722 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.37623432283720026, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005027601070585646 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.22058281726854637, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0024416343969262534 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.23900514298098396, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0022097628891594285 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.40434981140634907, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005043898533403243 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.24370773833649576, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002739699465790519 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2631784707358199, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002487054103176312 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_0.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..118b6f88bdce60fc9e7fa4c418bdbe760b768fe0 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.09869862910594125, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015754263415516496 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2425997427212717, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035498618411088337 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.13822546470552896, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020817271615335853 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.016787743878533688, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007005319097605839 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0425780543002491, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0017220443443167939 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.023697282994221247, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009611791067872703 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08626521539341878, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012705285495719658 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.21327598743091675, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029459890547047195 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.12098743849059979, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001680519826816699 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.07499548048418175, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011973587779807702 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.18673351936586377, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002877930418991892 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.1053525696001833, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001600085618320443 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.7848159984641091, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0846489540307912 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_1.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8ace88c7c98fde2f3ba763ecdb17b01ea3950f89 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.09124145037680485, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014659607841345699 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.22757570030500165, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034586902105503583 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12866565250951, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0019918374752407302 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012967504798466119, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006125196007516605 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03365509386476815, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0015985879929148613 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.018497303697898898, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008688724913792904 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07973413334544867, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012126850079835258 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.1999503525963445, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029346063767394658 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11258738658901131, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001654534085681258 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.07040045782230749, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011336127776379584 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17735154114603943, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002793790877961365 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09952018094863402, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015543045742524805 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6629456628355466, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08774497677853796 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_2.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..08345d2f47996c37c404df1f5203bf9a07cfc7ff --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.09013053440861082, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014220128061208542 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2273610167464681, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0034008282640079156 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1275054327542126, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001936094417525703 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.013685249717276669, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006153358129211847 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.03651263650932684, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016893088610047168 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.019662620850184154, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008838457269276937 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08023430784844289, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0011971788003166785 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.20334042051960866, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0029270688857344516 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11364996341359528, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016363233390032223 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06885212236096724, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0010822260185504432 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17558641489646157, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0027535532125502033 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09766604816138601, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001494448373327452 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6581479118355196, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07454636170059836 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_3.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..3e878fd79635aa4ceeed0d67b6135429a63c6640 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08887731112301472, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016693801787118184 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2137238957435145, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035351437827021116 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12223643758741752, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002043267213282596 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.01336910380993121, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006644057446671918 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.034200003110033475, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001715535153457774 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.018803041332499517, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009204376153200208 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07951277816840352, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0013813570731518456 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.1929746396810042, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0031195403408695475 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10976787111848983, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001747164342136605 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0692667028628289, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012719926110399762 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16841312507750444, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0028632604489444353 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09547379870346774, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015781898824613876 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6487530237798568, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07015562797768161 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_4.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ec38e4f8e2f234c99297edb7e1e4d76de13635e5 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.030778909853395358, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0020706771099415055 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.05446169618855107, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0031240949944244803 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0352935136087583, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002067265021549025 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.004626594678435454, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0005432919775929613 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.008754579533738186, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0008926051691577739 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.005563844272859168, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0005736273252218394 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.026771846295171677, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.001757008435057672 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04820099757783116, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002741549258464254 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.030796858749521736, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017427639473783613 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024799039714845435, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0017459449627102337 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.043284204930067245, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0025292545975519414 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.02795059198035774, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016496415851520105 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.31677978361218245, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.11491101515831749 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_5.json b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..abb601000c9e416cbd923409272058aed6310eb8 --- /dev/null +++ b/146m14b1b5/evaluation/generation/slim.146m14b1b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.0024889331628189042, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0006815397373640728 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.002181607723763231, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0006183413432850734 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.0022773127753522747, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.00063421295615357 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0005040504428810351, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00018325075012185258 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0003980023860711985, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.00015801551672316093 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.00043251733980451243, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0001624532835097625 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.002154819197651723, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0005785407429483231 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.00188617310535414, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0005267417519795776 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.001966631793324211, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005373577051134612 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.002154819197651723, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.000586342779903712 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0019061538009282621, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005449980000845078 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0019792726415445738, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0005523755455290908 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 5.0129268480742525e-39, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 2.561058897904247e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-146m-14b/146m14b1b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_0.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..bc8a5f010795b380cec7b778a94abd2c9f02092e --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.326,0.014830507204541031,0 +anli_r2,acc,0.341,0.014998131348402699,0 +anli_r3,acc,0.345,0.01372842153945488,0 +arc_challenge,acc,0.18344709897610922,0.011310170179554541,0 +arc_challenge,acc_norm,0.22184300341296928,0.012141659068147887,0 +arc_easy,acc,0.40446127946127947,0.010070746648278785,0 +arc_easy,acc_norm,0.3661616161616162,0.009885391390947728,0 +boolq,acc,0.5131498470948013,0.008742030090044975,1 +cb,acc,0.42857142857142855,0.06672848092813058,1 +cb,f1,0.3,,1 +copa,acc,0.59,0.04943110704237102,0 +hellaswag,acc,0.28271260705038836,0.004493975527386721,0 +hellaswag,acc_norm,0.2983469428400717,0.004565974937793719,0 +piqa,acc,0.6332970620239391,0.01124362501903826,0 +piqa,acc_norm,0.6240478781284005,0.011301098166895725,0 +rte,acc,0.5270758122743683,0.030052303463143706,0 +sciq,acc,0.678,0.01478291360099666,0 +sciq,acc_norm,0.601,0.015493193313162906,0 +storycloze_2016,acc,0.5799037947621593,0.011413833983106135,0 +winogrande,acc,0.5122336227308603,0.01404827882040562,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_0.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_0.json new file mode 100644 index 0000000000000000000000000000000000000000..8d463db1b9560be5bfff759c8cd663f4ba93c1ea --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.326, + "acc_stderr": 0.014830507204541031 + }, + "anli_r2": { + "acc": 0.341, + "acc_stderr": 0.014998131348402699 + }, + "anli_r3": { + "acc": 0.345, + "acc_stderr": 0.01372842153945488 + }, + "cb": { + "acc": 0.42857142857142855, + "acc_stderr": 0.06672848092813058, + "f1": 0.3 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.04943110704237102 + }, + "hellaswag": { + "acc": 0.28271260705038836, + "acc_stderr": 0.004493975527386721, + "acc_norm": 0.2983469428400717, + "acc_norm_stderr": 0.004565974937793719 + }, + "rte": { + "acc": 0.5270758122743683, + "acc_stderr": 0.030052303463143706 + }, + "winogrande": { + "acc": 0.5122336227308603, + "acc_stderr": 0.01404827882040562 + }, + "storycloze_2016": { + "acc": 0.5799037947621593, + "acc_stderr": 0.011413833983106135 + }, + "boolq": { + "acc": 0.5131498470948013, + "acc_stderr": 0.008742030090044975 + }, + "arc_easy": { + "acc": 0.40446127946127947, + "acc_stderr": 0.010070746648278785, + "acc_norm": 0.3661616161616162, + "acc_norm_stderr": 0.009885391390947728 + }, + "arc_challenge": { + "acc": 0.18344709897610922, + "acc_stderr": 0.011310170179554541, + "acc_norm": 0.22184300341296928, + "acc_norm_stderr": 0.012141659068147887 + }, + "sciq": { + "acc": 0.678, + "acc_stderr": 0.01478291360099666, + "acc_norm": 0.601, + "acc_norm_stderr": 0.015493193313162906 + }, + "piqa": { + "acc": 0.6332970620239391, + "acc_stderr": 0.01124362501903826, + "acc_norm": 0.6240478781284005, + "acc_norm_stderr": 0.011301098166895725 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_1.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..31f1e96af8fb9364f8453828f10a75c66450da05 --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.334,0.014922019523732963,0 +anli_r2,acc,0.315,0.014696631960792503,0 +anli_r3,acc,0.33416666666666667,0.01362243481313678,0 +arc_challenge,acc,0.18600682593856654,0.011370940183266728,0 +arc_challenge,acc_norm,0.2167235494880546,0.012040156713481192,0 +arc_easy,acc,0.39057239057239057,0.010011059112064236,0 +arc_easy,acc_norm,0.3547979797979798,0.009817629113069697,0 +boolq,acc,0.4957186544342508,0.008744734378208071,1 +cb,acc,0.5535714285714286,0.06703189227942398,1 +cb,f1,0.38156331670230453,,1 +copa,acc,0.55,0.04999999999999999,0 +hellaswag,acc,0.2811192989444334,0.004486268470666331,0 +hellaswag,acc_norm,0.29874526986656047,0.0045677248720572,0 +piqa,acc,0.6338411316648531,0.011240106070308453,0 +piqa,acc_norm,0.6284004352557128,0.011274603006724747,0 +rte,acc,0.5415162454873647,0.029992535385373314,0 +sciq,acc,0.691,0.014619600977206488,0 +sciq,acc_norm,0.634,0.015240612726405756,0 +storycloze_2016,acc,0.5617316942811331,0.011473969561488145,0 +winogrande,acc,0.489344909234412,0.014049294536290396,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_1.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_1.json new file mode 100644 index 0000000000000000000000000000000000000000..15b26487489027b3e3ba90bd6657d8facc096caa --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.334, + "acc_stderr": 0.014922019523732963 + }, + "anli_r2": { + "acc": 0.315, + "acc_stderr": 0.014696631960792503 + }, + "anli_r3": { + "acc": 0.33416666666666667, + "acc_stderr": 0.01362243481313678 + }, + "cb": { + "acc": 0.5535714285714286, + "acc_stderr": 0.06703189227942398, + "f1": 0.38156331670230453 + }, + "copa": { + "acc": 0.55, + "acc_stderr": 0.04999999999999999 + }, + "hellaswag": { + "acc": 0.2811192989444334, + "acc_stderr": 0.004486268470666331, + "acc_norm": 0.29874526986656047, + "acc_norm_stderr": 0.0045677248720572 + }, + "rte": { + "acc": 0.5415162454873647, + "acc_stderr": 0.029992535385373314 + }, + "winogrande": { + "acc": 0.489344909234412, + "acc_stderr": 0.014049294536290396 + }, + "storycloze_2016": { + "acc": 0.5617316942811331, + "acc_stderr": 0.011473969561488145 + }, + "boolq": { + "acc": 0.4957186544342508, + "acc_stderr": 0.008744734378208071 + }, + "arc_easy": { + "acc": 0.39057239057239057, + "acc_stderr": 0.010011059112064236, + "acc_norm": 0.3547979797979798, + "acc_norm_stderr": 0.009817629113069697 + }, + "arc_challenge": { + "acc": 0.18600682593856654, + "acc_stderr": 0.011370940183266728, + "acc_norm": 0.2167235494880546, + "acc_norm_stderr": 0.012040156713481192 + }, + "sciq": { + "acc": 0.691, + "acc_stderr": 0.014619600977206488, + "acc_norm": 0.634, + "acc_norm_stderr": 0.015240612726405756 + }, + "piqa": { + "acc": 0.6338411316648531, + "acc_stderr": 0.011240106070308453, + "acc_norm": 0.6284004352557128, + "acc_norm_stderr": 0.011274603006724747 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_2.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..29466e5948c182e49df47c054cf47ef1fa36a84e --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.321,0.014770821817934635,0 +anli_r2,acc,0.336,0.014944140233795021,0 +anli_r3,acc,0.3383333333333333,0.013664144006618275,0 +arc_challenge,acc,0.18088737201365188,0.011248574467407027,0 +arc_challenge,acc_norm,0.22184300341296928,0.01214165906814789,0 +arc_easy,acc,0.39225589225589225,0.010018744689650043,0 +arc_easy,acc_norm,0.3640572390572391,0.009873293392779117,0 +boolq,acc,0.48623853211009177,0.008741742106878654,1 +cb,acc,0.5,0.06741998624632421,1 +cb,f1,0.34486817325800373,,1 +copa,acc,0.54,0.05009082659620332,0 +hellaswag,acc,0.2813184624576778,0.004487235657955673,0 +hellaswag,acc_norm,0.2954590718980283,0.004553164013379556,0 +piqa,acc,0.6338411316648531,0.011240106070308455,0 +piqa,acc_norm,0.6256800870511425,0.01129127680119499,0 +rte,acc,0.5054151624548736,0.030094698123239966,0 +sciq,acc,0.686,0.014683991951087962,0 +sciq,acc_norm,0.637,0.015213890444671281,0 +storycloze_2016,acc,0.5665419561731694,0.011459581799087402,0 +winogrande,acc,0.4956590370955012,0.014051956064076892,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_2.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_2.json new file mode 100644 index 0000000000000000000000000000000000000000..ca8e4cddb70495598c881356889900449c944148 --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.321, + "acc_stderr": 0.014770821817934635 + }, + "anli_r2": { + "acc": 0.336, + "acc_stderr": 0.014944140233795021 + }, + "anli_r3": { + "acc": 0.3383333333333333, + "acc_stderr": 0.013664144006618275 + }, + "cb": { + "acc": 0.5, + "acc_stderr": 0.06741998624632421, + "f1": 0.34486817325800373 + }, + "copa": { + "acc": 0.54, + "acc_stderr": 0.05009082659620332 + }, + "hellaswag": { + "acc": 0.2813184624576778, + "acc_stderr": 0.004487235657955673, + "acc_norm": 0.2954590718980283, + "acc_norm_stderr": 0.004553164013379556 + }, + "rte": { + "acc": 0.5054151624548736, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.4956590370955012, + "acc_stderr": 0.014051956064076892 + }, + "storycloze_2016": { + "acc": 0.5665419561731694, + "acc_stderr": 0.011459581799087402 + }, + "boolq": { + "acc": 0.48623853211009177, + "acc_stderr": 0.008741742106878654 + }, + "arc_easy": { + "acc": 0.39225589225589225, + "acc_stderr": 0.010018744689650043, + "acc_norm": 0.3640572390572391, + "acc_norm_stderr": 0.009873293392779117 + }, + "arc_challenge": { + "acc": 0.18088737201365188, + "acc_stderr": 0.011248574467407027, + "acc_norm": 0.22184300341296928, + "acc_norm_stderr": 0.01214165906814789 + }, + "sciq": { + "acc": 0.686, + "acc_stderr": 0.014683991951087962, + "acc_norm": 0.637, + "acc_norm_stderr": 0.015213890444671281 + }, + "piqa": { + "acc": 0.6338411316648531, + "acc_stderr": 0.011240106070308455, + "acc_norm": 0.6256800870511425, + "acc_norm_stderr": 0.01129127680119499 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_3.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..bd8f5645bc213044030c72123c3c901686c08909 --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.309,0.014619600977206491,0 +anli_r2,acc,0.343,0.015019206922356953,0 +anli_r3,acc,0.3333333333333333,0.013613950010225601,0 +arc_challenge,acc,0.19112627986348124,0.011490055292778592,0 +arc_challenge,acc_norm,0.2167235494880546,0.012040156713481189,0 +arc_easy,acc,0.3888888888888889,0.01000324833531376,0 +arc_easy,acc_norm,0.36237373737373735,0.009863468202583789,0 +boolq,acc,0.4938837920489297,0.00874440068189347,1 +cb,acc,0.35714285714285715,0.0646095738380922,1 +cb,f1,0.24743230625583568,,1 +copa,acc,0.57,0.04975698519562428,0 +hellaswag,acc,0.2835092611033659,0.004497803024345142,0 +hellaswag,acc_norm,0.2983469428400717,0.004565974937793714,0 +piqa,acc,0.6251360174102285,0.011294565805619017,0 +piqa,acc_norm,0.6218715995647442,0.011313980666854535,0 +rte,acc,0.516245487364621,0.030080573208738064,0 +sciq,acc,0.672,0.014853842487270334,0 +sciq,acc_norm,0.627,0.015300493622922814,0 +storycloze_2016,acc,0.5628006413682523,0.011470867061664471,0 +winogrande,acc,0.5011838989739542,0.014052446290529015,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_3.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_3.json new file mode 100644 index 0000000000000000000000000000000000000000..88a465edde80b4f2100d357e65a8f9c372144e14 --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.309, + "acc_stderr": 0.014619600977206491 + }, + "anli_r2": { + "acc": 0.343, + "acc_stderr": 0.015019206922356953 + }, + "anli_r3": { + "acc": 0.3333333333333333, + "acc_stderr": 0.013613950010225601 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.0646095738380922, + "f1": 0.24743230625583568 + }, + "copa": { + "acc": 0.57, + "acc_stderr": 0.04975698519562428 + }, + "hellaswag": { + "acc": 0.2835092611033659, + "acc_stderr": 0.004497803024345142, + "acc_norm": 0.2983469428400717, + "acc_norm_stderr": 0.004565974937793714 + }, + "rte": { + "acc": 0.516245487364621, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.5011838989739542, + "acc_stderr": 0.014052446290529015 + }, + "storycloze_2016": { + "acc": 0.5628006413682523, + "acc_stderr": 0.011470867061664471 + }, + "boolq": { + "acc": 0.4938837920489297, + "acc_stderr": 0.00874440068189347 + }, + "arc_easy": { + "acc": 0.3888888888888889, + "acc_stderr": 0.01000324833531376, + "acc_norm": 0.36237373737373735, + "acc_norm_stderr": 0.009863468202583789 + }, + "arc_challenge": { + "acc": 0.19112627986348124, + "acc_stderr": 0.011490055292778592, + "acc_norm": 0.2167235494880546, + "acc_norm_stderr": 0.012040156713481189 + }, + "sciq": { + "acc": 0.672, + "acc_stderr": 0.014853842487270334, + "acc_norm": 0.627, + "acc_norm_stderr": 0.015300493622922814 + }, + "piqa": { + "acc": 0.6251360174102285, + "acc_stderr": 0.011294565805619017, + "acc_norm": 0.6218715995647442, + "acc_norm_stderr": 0.011313980666854535 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_4.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..74704aaaabe94d7cde11398eadd615a03545fea7 --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.333,0.014910846164229873,0 +anli_r2,acc,0.351,0.015100563798316403,0 +anli_r3,acc,0.335,0.01363087184382148,0 +arc_challenge,acc,0.18771331058020477,0.01141100131415512,0 +arc_challenge,acc_norm,0.2158703071672355,0.01202297536003066,0 +arc_easy,acc,0.3918350168350168,0.010016835016834962,0 +arc_easy,acc_norm,0.3653198653198653,0.009880576614806928,0 +boolq,acc,0.4938837920489297,0.00874440068189348,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.27474323062558353,,1 +copa,acc,0.59,0.04943110704237102,0 +hellaswag,acc,0.28281218880701053,0.004494454911844635,0 +hellaswag,acc_norm,0.2951603266281617,0.004551826272978058,0 +piqa,acc,0.6381936887921654,0.011211397313020366,0 +piqa,acc_norm,0.6207834602829162,0.011320331012905074,0 +rte,acc,0.48375451263537905,0.030080573208738064,0 +sciq,acc,0.692,0.014606483127342761,0 +sciq,acc_norm,0.64,0.01518652793204012,0 +storycloze_2016,acc,0.564404061998931,0.011466111817562836,0 +winogrande,acc,0.49013417521704816,0.014049749833367592,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_4.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_4.json new file mode 100644 index 0000000000000000000000000000000000000000..f6930cbc1669def170e45abdfeb646ae1b2efc4f --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.333, + "acc_stderr": 0.014910846164229873 + }, + "anli_r2": { + "acc": 0.351, + "acc_stderr": 0.015100563798316403 + }, + "anli_r3": { + "acc": 0.335, + "acc_stderr": 0.01363087184382148 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.27474323062558353 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.04943110704237102 + }, + "hellaswag": { + "acc": 0.28281218880701053, + "acc_stderr": 0.004494454911844635, + "acc_norm": 0.2951603266281617, + "acc_norm_stderr": 0.004551826272978058 + }, + "rte": { + "acc": 0.48375451263537905, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.49013417521704816, + "acc_stderr": 0.014049749833367592 + }, + "storycloze_2016": { + "acc": 0.564404061998931, + "acc_stderr": 0.011466111817562836 + }, + "boolq": { + "acc": 0.4938837920489297, + "acc_stderr": 0.00874440068189348 + }, + "arc_easy": { + "acc": 0.3918350168350168, + "acc_stderr": 0.010016835016834962, + "acc_norm": 0.3653198653198653, + "acc_norm_stderr": 0.009880576614806928 + }, + "arc_challenge": { + "acc": 0.18771331058020477, + "acc_stderr": 0.01141100131415512, + "acc_norm": 0.2158703071672355, + "acc_norm_stderr": 0.01202297536003066 + }, + "sciq": { + "acc": 0.692, + "acc_stderr": 0.014606483127342761, + "acc_norm": 0.64, + "acc_norm_stderr": 0.01518652793204012 + }, + "piqa": { + "acc": 0.6381936887921654, + "acc_stderr": 0.011211397313020366, + "acc_norm": 0.6207834602829162, + "acc_norm_stderr": 0.011320331012905074 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_5.csv b/146m14b1b5/evaluation/rankeval/146m14b1b5_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea7307150488fba3b223c74a53b596a7b881a57f --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.337,0.01495508791865361,0 +anli_r2,acc,0.334,0.014922019523732967,0 +anli_r3,acc,0.3466666666666667,0.013744022550571958,0 +arc_challenge,acc,0.19027303754266212,0.0114704241792257,0 +arc_challenge,acc_norm,0.2090443686006826,0.011882746987406453,0 +arc_easy,acc,0.3952020202020202,0.010031894052790978,0 +arc_easy,acc_norm,0.3661616161616162,0.009885391390947726,0 +boolq,acc,0.4902140672782875,0.008743379884697191,1 +cb,acc,0.48214285714285715,0.06737697508644648,1 +cb,f1,0.3363636363636364,,1 +copa,acc,0.57,0.04975698519562428,0 +hellaswag,acc,0.2824138617805218,0.004492535748097639,0 +hellaswag,acc_norm,0.30033857797251545,0.004574683373821047,0 +piqa,acc,0.6273122959738846,0.011281318332897741,0 +piqa,acc_norm,0.6213275299238302,0.011317163404516852,0 +rte,acc,0.48375451263537905,0.030080573208738064,0 +sciq,acc,0.69,0.014632638658632902,0 +sciq,acc_norm,0.635,0.015231776226264902,0 +storycloze_2016,acc,0.5611972207375735,0.011475500529062406,0 +winogrande,acc,0.47277032359905286,0.014031631629827701,0 diff --git a/146m14b1b5/evaluation/rankeval/146m14b1b5_5.json b/146m14b1b5/evaluation/rankeval/146m14b1b5_5.json new file mode 100644 index 0000000000000000000000000000000000000000..79521b71e51695ede0599e754e097920c7e7b15c --- /dev/null +++ b/146m14b1b5/evaluation/rankeval/146m14b1b5_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.337, + "acc_stderr": 0.01495508791865361 + }, + "anli_r2": { + "acc": 0.334, + "acc_stderr": 0.014922019523732967 + }, + "anli_r3": { + "acc": 0.3466666666666667, + "acc_stderr": 0.013744022550571958 + }, + "cb": { + "acc": 0.48214285714285715, + "acc_stderr": 0.06737697508644648, + "f1": 0.3363636363636364 + }, + "copa": { + "acc": 0.57, + "acc_stderr": 0.04975698519562428 + }, + "hellaswag": { + "acc": 0.2824138617805218, + "acc_stderr": 0.004492535748097639, + "acc_norm": 0.30033857797251545, + "acc_norm_stderr": 0.004574683373821047 + }, + "rte": { + "acc": 0.48375451263537905, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.47277032359905286, + "acc_stderr": 0.014031631629827701 + }, + "storycloze_2016": { + "acc": 0.5611972207375735, + "acc_stderr": 0.011475500529062406 + }, + "boolq": { + "acc": 0.4902140672782875, + "acc_stderr": 0.008743379884697191 + }, + "arc_easy": { + "acc": 0.3952020202020202, + "acc_stderr": 0.010031894052790978, + "acc_norm": 0.3661616161616162, + "acc_norm_stderr": 0.009885391390947726 + }, + "arc_challenge": { + "acc": 0.19027303754266212, + "acc_stderr": 0.0114704241792257, + "acc_norm": 0.2090443686006826, + "acc_norm_stderr": 0.011882746987406453 + }, + "sciq": { + "acc": 0.69, + "acc_stderr": 0.014632638658632902, + "acc_norm": 0.635, + "acc_norm_stderr": 0.015231776226264902 + }, + "piqa": { + "acc": 0.6273122959738846, + "acc_stderr": 0.011281318332897741, + "acc_norm": 0.6213275299238302, + "acc_norm_stderr": 0.011317163404516852 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..61be5d72b01ae21f5a27c2d1ed3244d753dbc753 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394d3f2d994965a54356e53c628339c8b2079c89c6be760bb4d2f8f26973902a +size 27478295 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1f914cdd161e5be7ba2114b040ea1c4fa9398d2 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88023c86233b3f4a5213502ad24472650eca64abd0a13387736f19da400ffed +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0dae60b7cec31cd88b847b894b2e21b79edc566 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e99b31760b06906e570f3731d848eb408e613b35ff36ee26f6c0142b36fb399d +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af0a4a76a1ae05818cbb7fcf5c57c4db471d71f9 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c05e61bd4b5ee5c207b25b6e7c3efe8a472ac77f4972155491cac20e154611 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d01f0f666d62d1bb1453ebd6f4fa2fac2711a4a3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd17f0eed23b7205f22fac6deb730554b744e81d512531cc9d6698ea1b06dd8 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..34db967bd8da30e726c37ac5f440e51a00cbfa9b --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d62c731db5ea53697c9fb0955cd976ff8a46940625086ec10ae9db0dd53cb17 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42e85e5bffffd920a9db93c3ef35454358de8c2c --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14eed8c488705d9712d6267b9e1726f93f17d36338b1ee92395552bb1896574 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79ffeccc484a3c3efa653b5b06703b1653e7deeb --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c34b46c80cd6963f7108d5577248bda7904ff8163ec6d46d368a506aaaf1224 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea65fbb8da82d0b886e206e0eb5168e751a7efeb --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8a7a678d3228e3aef9febdb46c248d3330fcf21372e1c31a2c3696d3cb70af +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4b8df082ced57943e12336f0a3c19acbc7b1e74 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da76e0cd5ad67f9ea2534467cd003e9cbfcd8bc3e91ed24ab32164ab8795b34 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6997975527e1ad6a0e12e7bf277c6fd9a7d878cd --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31eac16094c14e2c2e11951d95ec6d5b3e3dac6843ee79ffe09d86c3b39fca43 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc30d41d9a4c37845e64f35f630eba79440f1d38 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392823c5fbacc365efc71bc80517001b45465f1dac9769ecb1c95033730a3174 +size 27478231 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce6bc499dea1c9272836932603fb23c3518396b6 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adfb608f148a4941a3fa01385c0151bcbf13801ff68d8749d1861427023a78e7 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..42a7dbd531261feb9663c2b54fef0ce23b1b4812 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9528bdbdfa78782dbb3608fc51dfbe35734c6aadb96e62a882e17b14635b78 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a367cdbb12f3e7f2276d97a37428274180c4d3ac --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5fd5ceb43bc2c05a96fc5534a3b7143916146596baf687548a3962c91fb4db2 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e60bccab2a934250162ea3a3816ab7935ca71df9 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c5057652d1e4f642cdd0094929b8e06b9dcfce620cbeafd4da5632bbbc6ca23 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b8beef62e73e01f765f1a505d31d89217d0194c --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c553b2344184d45379367f3105d43a9f0b90e015aa61944c02fbd7f38e5036b6 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc270b62880c8d09cb4c69c50e717716a4c25da6 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb0e0957cff17bdb46e520475db86a60603723e97589ad02a40603b7114da040 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..96ee00d793ae7b75de00571e34a2531220093e58 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334b7a459f9757a43dd57dafa0dd4df39d4895bc100644d2479ee22d1993c131 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..19e7cf840318d6e8293aff46db7d5d30e513c790 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8a2eda004c7784859c50ed795311a5650042a6b03dcc77a32288026e62d5e82 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..253e7043561555ce9fa8d040bc6f5eb5cd6eb5a8 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c885b0e1e6477fc64b81b8f84630141fb5f5b4ae0987d239f53e62e913f19a8b +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9eba11dac74962aa629b73b0a0a10e717c0d0d4 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b186d2fa9d0d8607dd0071fd187dbabe893bbf804a825d19bcfb148ce54017 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d3636fd93c457035cb9484d62b232110c7e69d8 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7691726304d299c6e598deea1001180bb25c4c395886aa60d976f06df98590f +size 27478231 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc533dd43418d1590b4467d0e04b028b92b0a6aa --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3af5bfc375a8e7f143522b8713f93beac56754d3dfd4d10c9a836063ff7b4ce +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b23d33da0d17dbf9d244da468eb89b28ac9bec60 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f585384d55dc6247c1a5a151f891559e02e922a0ce5ab94ed74ca73c333148 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d92584660cd3905a9f5d03787c1b7631bd1a204 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff89adf6c4019c8cb3534758fd920f37a1e8dc50658c45f62cb21117e75ddf3 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08a91bd2917d311de333ca32b580967da405cb74 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380f9905f5d7a7b5a2568c38e6a70a59130d4cce191ec6caea3996c4925b2cbf +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..462fc18bd7b9a6f3569ee47d851d5b75fd55f67a --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dae006e8dc76dfff05710bf3c185d90ede5624e37fb34a687f21e456a2b1808 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc54bad4cdf15f2ebec05e9781e452606033ab85 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3827d6388bb52385d73c52972fc994624540b90650ded072192d9629ff76e69c +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a1948706f12c02e0f5cae67308122ab4a1846a3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54f488227a7ea66afbdc80c79ccf807a2e156417ce4110852f9340295399874 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f73221c942ba8566dc12e87c2515dd960d24c52 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8accb3d3c6e20b5cf7a6af35e029355447e4ab8cec35f03f13c1563d867e3dd1 +size 27478114 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2171484708177763dd0fd2157cc9934dfd7ec78 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f90358424aa509321c40fbb041b7d8fb690f0f7ab02fde4865a4688dc8fa3683 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8205a855898d5fe46070d598eab594a56694fafc --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75db0e9b8036474abe6ecdf27b26551e1acccf45a7c09f9277d89c503d81280f +size 27478434 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..91d1248068936372e3f92e3cd03a8b1461d3aa18 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3725fe26bbb4d3674f6e8cd1037e4cecf6bacce7971ec667dca15bdd26eaf43 +size 27478167 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..534e3d98cae3d2ed02fb5d3eaaa36a7ba60e1fb0 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ee21226733cafc13919abdf2085ecbd09c69e3712f619d88d24ce909aecb7a +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd338d133e0053c7653226cc6e4c88597fa3c5c3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e83ffab55c65a01dcc3b6ad42b9156f915e41eb5e16287943a35d0adc98eab +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a61308e54dea8e5412a5e6c43f1bfb424487065 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c72cad62a6a9014dbdbc444652cc78f9bff1298d78716db26cdf407ee6b780a +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6111d68d279d86036cbec8fa3ab1e789bcb06a4c --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:604eae874d67575567e76d88339a257eab0c42af09ffb014cb08cc200e47f8d1 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9f73bfcd865e86ee6303b3bdbe01c31e519fcae --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19fa874f596a7a83fc5559d27bc2412f9efb8ec767aec19daf9150fd2fab9c02 +size 27478434 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2550159b6f8074646da2abc1f9132985c914c1a5 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f59da08bf378fff598f897eb414dd0d228792962f87df4919f202452baa33d +size 27478050 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5051fee6f963bb4de280f9ae04902eae451ac29 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd3ec62d16199fb7cc1b03068e44775d8156c09249ae2661ed7c42e78e451cb +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8636cb2ea3c4b7f53fb2b598dd6f4d9e562b368 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7783e40ea2e4bbe43bc5c4a8433121121837e89dcc21151bc2f84c3464473cc +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0b518d8f74eec30416d56522f164698ef499b4a --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb5b3ac0d760c5ce0bc989b091a45239155b44a6681ccb533afb61c4972fa78 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..38dea038dc615083c0016afb979d074ac8cb8fa3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e167d17c00965ff17027d2d7340c7bc8a66b42f45ba4ac199f944507c2754c +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9085dfb3a5030a3779e20f62f805dad70ecc0c22 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ab9b4767faab13f51e6f491a2f7801a18ff40eef4d782ba8ef25ef2dab34b5 +size 27478231 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3146671391349e67615c3f1673c8f8883a5b9a77 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42c681868eddcbc4241c8a5b69cee10a92570ab02e5479112b5af5fb5dc0d30 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fc4431e559b398fde559984ad55d17a7875c431 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1f3e7eb79f94e99c5be75b9ea201aa7b8b7fbd57effc9d5e5c3d06968746cf +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc87d42c6c91bfa958dd68ae1dd5e94d25373c82 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7abe28bad4883bba505c123759b42c58479a0cfb9cf2f8ad8d061078eb0f31dc +size 27478434 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8221320e366d1042ff2cd130328c9477dc3be01c --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58c28810f4ff4534ec05a3c20e169c9d14b4138e0a565063f42ba93af9c3528 +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9044cacfc1a9f3e5a6e4e62dd91f17bef816b2ec --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ca4de8f86e3520f9e6870b5b5a8fbcb46a1104105fa4c15b64fabceda266502 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..80ad77060e4e12711ded95e98d28f63edf089ea5 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4483c98633ee8696cc1eb8d0f055b6583051cf17bb516620513ca008c19a2e55 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b46442e7b8331eedb34b81715a65494c91520be2 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f79e31d1d1daad34341859a868c7d3d305ebd60a8d8a29276de6ed603071c13 +size 27478306 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae2a117f6f37910db62180946591b6d8ddecb324 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59949979cf9a5bcff06978747f59fbefdd0324ab2560067819a82a8ebcb59edf +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..da164cc92a397904df9292c4cd39204d837190ea --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efee079cd6777f4c2ff28b930b4d72fac8c74242f2c5b81013973b522371474b +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6435483599fe43c6381b0a9ed113cedd8eee1c3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b62fae0a09bc490b984f1a19405bbcf3630ac54b72a5109ae1c153e62c53176 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2689cb658da69d81c6af7d726fcc4cefd88dadec --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4061e21f6f4804ff1b344007cc27cdb815befa35bbb558852b134a1b8d76d0 +size 27478167 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b812a4b0e80ffe2ab63b1a451de7abc82333f858 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24c97b0b3170d7c95cb8c58d30ab0d75ea1f46751c7c4c92914e26c85795d8cb +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bcde3fc24185764e08d0a900a803bbc94a54a2b --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a868b18c8fdc04345af1c2af50dfaf7eeed6437619cd66af7765e2357f597627 +size 27478370 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a817bbfb2797175a61bb5266c6aaf6d701ba8a65 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0982a0983c64ff2c765d6aa2d97d6c0954f3eb7dd9106a3b08fe002625adeaaf +size 27478178 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..966675ccbe62411852790b59102f5a836105e59b --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af706868ace2ab9b1c2e43932ad8df04f01a417eb5a409e21f2dab91160d573 +size 27478242 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2b7b4bf49cb61af9b7177edb726fc336755c4e3 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e3271241d198b29b43e24901ccfcf9bd851e6a001886f038cc7b0dc216e7ad +size 27478359 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb5da7c10e9a6d6e7fc0faf8b9bcbbc1f271f8e7 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67eb59570a727a7b48771bbf8b2bcf9510cc028736dec1c8d3c92845c3136ad +size 27478103 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0b17cec1836ece7822d4be3786d289caea0ec0e --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39410495c9a8a58ae8b7b2afc022478def0b2ba3837791fe08e91bcfc912a0b5 +size 27478359 diff --git a/146m14b1b5/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/146m14b1b5/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f863cbd99bb8fc713865192be63eef20f2e2ea5 --- /dev/null +++ b/146m14b1b5/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e74d53f603560780dc9862276a0e8cb115455c50944dda5e7a4a403974d08b +size 27478167 diff --git a/146m14b1b5/global_step21553/layer_01-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d38c4ed49ef9e69b62ea4b9a0eab1013633c0aee --- /dev/null +++ b/146m14b1b5/global_step21553/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2758d9769cb2d6783eef9f31565a765d7640c6ec507ae6ae8053218b53b9c696 +size 80413955 diff --git a/146m14b1b5/global_step21553/layer_03-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad8de99f701fc1237036dbc4750a6a5a6dc26b2 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31184e000766884801c9c49d2a4b2c080984ca767dbdfb4b6c7eedcf3be08dca +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_04-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac7d7790559577ddc0e367424141e6bd71ef181 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b1f139a35697cee80d1a796bb10feb475ad7ed28f5dce12e4daf8d9b6292c8 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_05-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc1b3fced4bf9637dfe25c5e0b8f31da94e8ec73 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107209101113c4d5e293cbb2eefaa17fec7e8f4dbe45d0992cc8f7d12e45bb6b +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_06-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f303dde6a730ec6de9383609ab50b44ca55d89bf --- /dev/null +++ b/146m14b1b5/global_step21553/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8364e3991f2b3727548d402549ccad087451440e7879b326c5ea5991286e80ae +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_07-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0d6b797e8a650b212eec4dc28ee34abcb7322ec --- /dev/null +++ b/146m14b1b5/global_step21553/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ddf46c61e383291af933670939856c2b716f64fd0d35b3ac69da4f7cd7d394 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_08-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1dda4809b1a4d1ca0edd51a050bb728e52185d14 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de55d97dcee961db2606eeccc0f92909a6063a173fbe295577142824d683af6 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_09-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0d09d6820abc7365fb7e94de5e81847f6e3f883 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d784338481bf5b32656e0bad16ba39e18dd925ac3ff6fcfb7d68630e0bbff2ad +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_10-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..971eb89bba93186d4eb68924ec99195840a0ca17 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8adece28e0a546fa64f1aeb2d3300c5a51100b04c40db385e0b805c3c8daf8cf +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_11-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a22e33084eaa397e940f808c6d9dc43f51f4183d --- /dev/null +++ b/146m14b1b5/global_step21553/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67aa7927c35e1059c4c1f92599014eb2343c73d5705aa45592a1a428104eea38 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_12-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..343ed9544ad1c0a73abe81c97639d74c9e16567e --- /dev/null +++ b/146m14b1b5/global_step21553/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5820ddc88a0f7be826d18b5b503972adcc0b3fb0491b4c540ff6faa2fa3bc3bd +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_13-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..986a679681e13856a695aa64a6a75274a8f973bc --- /dev/null +++ b/146m14b1b5/global_step21553/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b2a3a50e85a6d3c06cc7c68ef1bcdae5cbaec6653a8ccd7f7db2434cefb4d5 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_14-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..783c8dca36351880f99053d79ea654027fc00753 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4295608bc64c3088d7d30701ad8b5f751b78f1efe8f82b18d1e1537b53061a5 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_15-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a3e1e833bd81a369d40e51097c52280abbf9616 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b86c07a777a9ea9b8d65e7e335b758b3a56ed26ca7b0a9f71b34776d5ba8aa +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_16-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..acc8792f8ec21330578e6f5753d5c8be592b3334 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745936df451247ec2b0d712540437ede53e35dcbf29c30e8588e538b7f091026 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_17-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..60ec39fc6a86be907e0810da687bc5072f680162 --- /dev/null +++ b/146m14b1b5/global_step21553/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b22272e7c8ac2ae685710fc253d1aaef00a3ee9834c27926e6b28ffcb845ab1 +size 14180099 diff --git a/146m14b1b5/global_step21553/layer_19-model_00-model_states.pt b/146m14b1b5/global_step21553/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a78a2695111a6262437f2dc02c3099097e18f1a --- /dev/null +++ b/146m14b1b5/global_step21553/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bad13edd29ec79323f25011c737a10fc547df31199f00a0e9b7498bc8d29149 +size 4291 diff --git a/146m14b1b5/global_step21553/mp_rank_00_model_states.pt b/146m14b1b5/global_step21553/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e5474edcc2706c728f759d19ae284d302989756 --- /dev/null +++ b/146m14b1b5/global_step21553/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7735878ba3bd2c9702ad48c2973c1135077529e1e40dd639c38a346cef1ee35 +size 35443 diff --git a/146m14b1b5/logs/2820872.err b/146m14b1b5/logs/2820872.err new file mode 100644 index 0000000000000000000000000000000000000000..1a2bbf42d8647a23ef928a4a19964000f7fd00ff --- /dev/null +++ b/146m14b1b5/logs/2820872.err @@ -0,0 +1,1122 @@ +0: 2023-02-09 22:43:20.401744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401751: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401744: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401748: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401739: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401754: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:43:20.401756: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.417979: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.417991: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.417997: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.417998: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.418007: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.418004: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.418005: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:43:20.418022: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421653: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421652: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421648: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421669: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421677: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421661: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421667: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:20.421668: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425741: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425748: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425753: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425769: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425772: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425780: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:43:20.425767: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426529: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426538: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426534: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426535: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426545: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426546: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426529: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:43:20.426551: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434474: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434471: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434477: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434472: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434473: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434479: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434468: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:43:20.434465: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434897: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434900: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434905: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434909: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434915: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434908: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434899: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:43:20.434905: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533727: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533723: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533741: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533731: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533736: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533723: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:43:20.533723: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:43:22.050782: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050785: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050788: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050801: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050803: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.050789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:22.051185: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051186: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051190: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051190: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051193: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051192: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051194: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:22.051199: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.065823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065829: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065833: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065834: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-09 22:43:22.065947: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-09 22:43:22.065953: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065841: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-09 22:43:22.065954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.065845: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 2023-02-09 22:43:22.065954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.066235: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.066238: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.065963: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-09 22:43:22.066243: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.066243: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.066245: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.066247: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:22.066250: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:43:22.066251: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.065948: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:22.065966: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:22.065959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:22.066464: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066469: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066472: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066477: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066480: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066483: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066484: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:43:22.066486: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.074635: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074643: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074648: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074643: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.074646: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:22.075086: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075092: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075092: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075096: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075096: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075097: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075101: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:43:22.075105: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076461: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076481: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:22.076642: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076649: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076649: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076650: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076653: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076657: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076658: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:43:22.076659: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080149: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080157: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080158: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080154: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080164: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080159: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:22.080514: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080516: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080519: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080521: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080521: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080520: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080523: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:43:22.080526: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142002: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142019: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142023: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142013: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:22.142423: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142428: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142431: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142431: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142433: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142435: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142436: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:43:22.142440: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192410: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192416: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192420: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192426: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192418: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:22.192918: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192920: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192924: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192923: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192927: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192927: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192933: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:43:22.192938: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:43:31.776025: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776048: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776063: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776068: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776080: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776095: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.776097: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778467: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778466: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778477: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778484: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778483: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778491: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778492: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778494: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778496: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778496: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:43:31.778498: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:43:31.778515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.783799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783940: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783955: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783827: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783965: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783850: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783848: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783986: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783853: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783998: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783864: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.783999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-09 22:43:31.783876: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.784003: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784769: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784898: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784822: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784928: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784832: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784943: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784851: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784953: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.784954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.784872: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.785000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786162: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-09 22:43:31.786232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-09 22:43:31.786236: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786234: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: 2023-02-09 22:43:31.786176: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.786253: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786170: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786231: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786173: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786235: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786174: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786234: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786177: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786233: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786290: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786174: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786239: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786295: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786176: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786243: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.786300: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786249: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786249: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:43:31.786255: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786254: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786256: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:43:31.786194: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786196: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786256: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:43:31.786259: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786198: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786199: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786201: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786202: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:43:31.786204: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.786892: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786935: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786944: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-09 22:43:31.787311: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.786996: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787314: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.787315: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787315: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787317: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787320: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787319: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787330: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787429: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.787324: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787321: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787326: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787320: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:43:31.787329: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787329: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787335: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787334: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787336: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787337: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.787456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.787328: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-09 22:43:31.787337: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787341: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:43:31.787341: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787460: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.787329: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787336: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.787338: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.787471: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-09 22:43:31.787345: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.787343: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:43:31.787348: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:43:31.787349: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.787475: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787485: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.787487: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788493: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788496: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788500: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788509: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788503: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788509: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788511: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788519: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788521: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788560: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:43:31.788563: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789101: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789105: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789109: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789112: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789121: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789122: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789124: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789127: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789125: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789129: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:43:31.789156: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:43:31.789172: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789579: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789580: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789584: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789583: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789590: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789596: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789597: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789601: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789606: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789608: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789609: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:43:31.789644: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:43:31.789662: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788578: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:43:31.788581: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +2: Successfully preprocessed all matching files. +2: Successfully preprocessed all matching files. +2: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +5: Successfully preprocessed all matching files. +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +4: +4: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +6: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Loading extension module utils... +0: Loading extension module utils...Loading extension module utils... +0: +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +2: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +6: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils...Loading extension module utils... +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +1: +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +1: No modifications detected for re-loaded extension module utils, skipping build step... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +2: +2: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +5: +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: Loading extension module utils... +4: +4: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +6: +6: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils...Loading extension module utils... +6: +3: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils...Loading extension module utils... +3: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/146m14b1b5/logs/2820872.out b/146m14b1b5/logs/2820872.out new file mode 100644 index 0000000000000000000000000000000000000000..4fa5dc5a56b85aa873d418d5e6cf442619236938 --- /dev/null +++ b/146m14b1b5/logs/2820872.out @@ -0,0 +1,5664 @@ +Model parameters: d_model 768 ffw_size 3072 kv_size 64 n_heads 12 n_layers 15 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 15 --hidden-size 768 --num-attention-heads 12 --kv-channels 64 --ffn-hidden-size 3072 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-146m14b1b5val --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --eval-only true --tensorboard-dir tensorboard_146m14b1b5val --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_146m14b1b5 --load checkpoints_146m14b1b5 --train-weighted-split-paths-path train14b.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2820872.json --zero-stage 0 +START 2820872: Thu 09 Feb 2023 10:42:59 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 44.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 45.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 46.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 43.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 46.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 44.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 45.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 43.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 46.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 42.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 47.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 44.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 47.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 46.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 47.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 42.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 41.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 45.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 47.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +5: Launching on nid007043 (5/8), master nid007038 port 9999, GPUs 8, CUDA: True +2: Launching on nid007040 (2/8), master nid007038 port 9999, GPUs 8, CUDA: True +4: Launching on nid007042 (4/8), master nid007038 port 9999, GPUs 8, CUDA: True +7: Launching on nid007045 (7/8), master nid007038 port 9999, GPUs 8, CUDA: True +0: Launching on nid007038 (0/8), master nid007038 port 9999, GPUs 8, CUDA: True +6: Launching on nid007044 (6/8), master nid007038 port 9999, GPUs 8, CUDA: True +1: Launching on nid007039 (1/8), master nid007038 port 9999, GPUs 8, CUDA: True +3: Launching on nid007041 (3/8), master nid007038 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2820872.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... True +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 3072 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 768 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-146m14b1b5val +0: kv_channels ..................................... 64 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_146m14b1b5 +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 12 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 15 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_146m14b1b5 +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_146m14b1b5val +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-09 22:44:23,289] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.120 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 26.484 seconds +0: time to initialize megatron (seconds): 68.610 +0: [after megatron is initialized] datetime: 2023-02-09 22:44:52 +0: building GPT model ... +0: [2023-02-09 22:44:52,743] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-09 22:44:52,744] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-09 22:44:52,744] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.14 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-09 22:44:54,756] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=22 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: undo +0: 19: MixedFusedLayerNorm +0: 20: EmbeddingPipe +0: 21: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-09 22:44:55,077] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-09 22:44:55,077] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.29 GB Max_CA 0 GB +0: [2023-02-09 22:44:55,077] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.17 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-09 22:44:55,079] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-09 22:45:08,130] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-09 22:45:08,131] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-09 22:45:08,131] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-09 22:45:08,142] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-09 22:45:08,142] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-09 22:45:08,266] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-09 22:45:08,267] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.29 GB CA 0.31 GB Max_CA 0 GB +0: [2023-02-09 22:45:08,267] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.84 GB, percent = 6.1% +0: Time to load utils op: 0.1091611385345459 seconds +0: Time to load utils op: 0.10805487632751465 seconds +0: Time to load utils op: 0.10905790328979492 seconds +0: Time to load utils op: 0.0004870891571044922 seconds +0: Time to load utils op: 0.0004818439483642578 seconds +0: Time to load utils op: 0.0005562305450439453 seconds +0: ninja: no work to do. +0: Time to load utils op: 0.15857386589050293 seconds +0: Time to load utils op: 0.3031327724456787 secondsTime to load utils op: 0.3034231662750244 seconds +0: +0: Time to load utils op: 0.303433895111084 seconds +0: Time to load utils op: 0.30272674560546875 seconds +2: Time to load utils op: 0.31148433685302734 secondsTime to load utils op: 0.3112001419067383 seconds +2: +2: Time to load utils op: 0.31104564666748047 seconds +2: Time to load utils op: 0.31084775924682617 secondsTime to load utils op: 0.31094932556152344 seconds +2: +2: Time to load utils op: 0.3111588954925537 seconds +2: Time to load utils op: 0.31092286109924316 seconds +2: Time to load utils op: 0.31130194664001465 seconds +1: Time to load utils op: 0.3118927478790283 seconds +1: Time to load utils op: 0.3118009567260742 secondsTime to load utils op: 0.3118932247161865 seconds +1: +1: Time to load utils op: 0.3119065761566162 secondsTime to load utils op: 0.3120427131652832 seconds +1: +1: Time to load utils op: 0.3119232654571533 secondsTime to load utils op: 0.3119699954986572 seconds +1: +1: Time to load utils op: 0.3119349479675293 seconds +5: Time to load utils op: 0.31176233291625977 secondsTime to load utils op: 0.31162595748901367 seconds +5: +5: Time to load utils op: 0.31157970428466797 seconds +5: Time to load utils op: 0.31160879135131836 secondsTime to load utils op: 0.31175756454467773 secondsTime to load utils op: 0.3117692470550537 seconds +5: +5: +5: Time to load utils op: 0.31165552139282227 seconds +5: Time to load utils op: 0.31172680854797363 seconds +3: Time to load utils op: 0.3113987445831299 secondsTime to load utils op: 0.31139469146728516 seconds +3: +3: Time to load utils op: 0.31139707565307617 secondsTime to load utils op: 0.31140661239624023 seconds +3: +3: Time to load utils op: 0.3114197254180908 seconds +3: Time to load utils op: 0.3114314079284668 seconds +3: Time to load utils op: 0.3114304542541504 secondsTime to load utils op: 0.3114304542541504 seconds +3: +4: Time to load utils op: 0.31266140937805176 seconds +4: Time to load utils op: 0.31266164779663086 secondsTime to load utils op: 0.3126668930053711 secondsTime to load utils op: 0.31266283988952637 secondsTime to load utils op: 0.3126535415649414 seconds +4: +4: Time to load utils op: 0.31265854835510254 seconds +4: +4: +4: Time to load utils op: 0.31267333030700684 seconds +4: Time to load utils op: 0.31267786026000977 seconds +6: Time to load utils op: 0.3113248348236084 seconds +6: Time to load utils op: 0.31134676933288574 seconds +6: Time to load utils op: 0.31136465072631836 seconds +6: Time to load utils op: 0.3113863468170166 seconds +6: Time to load utils op: 0.31139397621154785 secondsTime to load utils op: 0.3114054203033447 seconds +6: +6: Time to load utils op: 0.31140995025634766 secondsTime to load utils op: 0.3114159107208252 seconds +6: +7: Time to load utils op: 0.3109302520751953 seconds +7: Time to load utils op: 0.310910701751709 secondsTime to load utils op: 0.310638427734375 seconds +7: +7: Time to load utils op: 0.31052613258361816 seconds +7: Time to load utils op: 0.3106822967529297 seconds +7: Time to load utils op: 0.3109605312347412 seconds +7: Time to load utils op: 0.31066346168518066 seconds +7: Time to load utils op: 0.309978723526001 seconds +0: Time to load utils op: 0.0004112720489501953 secondsTime to load utils op: 0.0005223751068115234 seconds +0: +0: Time to load utils op: 0.0004029273986816406 seconds +0: Time to load utils op: 0.0004088878631591797 seconds +1: Time to load utils op: 0.0008101463317871094 seconds +1: Time to load utils op: 0.0010919570922851562 seconds +1: Time to load utils op: 0.001043558120727539 seconds +1: Time to load utils op: 0.0010862350463867188 seconds +1: Time to load utils op: 0.0010900497436523438 seconds +1: Time to load utils op: 0.0010535717010498047 seconds +1: Time to load utils op: 0.0010480880737304688 seconds +1: Time to load utils op: 0.0011451244354248047 seconds +2: Time to load utils op: 0.0009748935699462891 seconds +2: Time to load utils op: 0.0015075206756591797 secondsTime to load utils op: 0.0015003681182861328 seconds +2: +2: Time to load utils op: 0.0014722347259521484 secondsTime to load utils op: 0.0014653205871582031 seconds +2: +2: Time to load utils op: 0.001461029052734375 seconds +2: Time to load utils op: 0.0014948844909667969 seconds +2: Time to load utils op: 0.0015382766723632812 seconds +5: Time to load utils op: 0.0009765625 seconds +5: Time to load utils op: 0.001069784164428711 seconds +5: Time to load utils op: 0.0011684894561767578 seconds +5: Time to load utils op: 0.0013489723205566406 seconds +5: Time to load utils op: 0.0013344287872314453 seconds +5: Time to load utils op: 0.00131988525390625 seconds +5: Time to load utils op: 0.0012311935424804688 seconds +5: Time to load utils op: 0.0013346672058105469 seconds +0: [2023-02-09 22:45:08,566] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-09 22:45:08,567] [INFO] [utils.py:828:see_memory_usage] MA 0.28 GB Max_MA 0.28 GB CA 0.31 GB Max_CA 0 GB +0: [2023-02-09 22:45:08,567] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +3: Time to load utils op: 0.0009844303131103516 seconds +6: Time to load utils op: 0.001004934310913086 seconds +4: Time to load utils op: 0.0012805461883544922 seconds +4: Time to load utils op: 0.0015363693237304688 seconds +3: Time to load utils op: 0.0015659332275390625 seconds +6: Time to load utils op: 0.0016145706176757812 seconds +4: Time to load utils op: 0.0015163421630859375 seconds +6: Time to load utils op: 0.0016446113586425781 seconds +3: Time to load utils op: 0.0015802383422851562 seconds +6: Time to load utils op: 0.001605987548828125 seconds +6: Time to load utils op: 0.0016493797302246094 seconds +3: Time to load utils op: 0.0015556812286376953 seconds +4: Time to load utils op: 0.0016329288482666016 secondsTime to load utils op: 0.0015921592712402344 seconds +4: +3: Time to load utils op: 0.0015363693237304688 secondsTime to load utils op: 0.0016052722930908203 seconds +3: +3: Time to load utils op: 0.0016586780548095703 seconds +4: Time to load utils op: 0.0015721321105957031 seconds +6: Time to load utils op: 0.001631021499633789 seconds +3: Time to load utils op: 0.0015664100646972656 seconds +4: Time to load utils op: 0.0015549659729003906 secondsTime to load utils op: 0.0016558170318603516 seconds +4: +6: Time to load utils op: 0.001707315444946289 seconds +6: Time to load utils op: 0.0017361640930175781 seconds +7: Time to load utils op: 0.0004987716674804688 seconds +7: Time to load utils op: 0.00040531158447265625 seconds +7: Time to load utils op: 0.00046753883361816406 seconds +7: Time to load utils op: 0.00048089027404785156 seconds +7: Time to load utils op: 0.0004780292510986328 seconds +7: Time to load utils op: 0.0004353523254394531 seconds +7: Time to load utils op: 0.0004825592041015625 seconds +7: Time to load utils op: 0.0004932880401611328 seconds +0: [2023-02-09 22:45:08,686] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-09 22:45:08,687] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,687] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:08,795] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-09 22:45:08,796] [INFO] [utils.py:828:see_memory_usage] MA 0.62 GB Max_MA 0.62 GB CA 0.82 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,796] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:08,904] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-09 22:45:08,904] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:08,905] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,009] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-09 22:45:09,010] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,010] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,118] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-09 22:45:09,118] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,119] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,223] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-09 22:45:09,224] [INFO] [utils.py:828:see_memory_usage] MA 0.83 GB Max_MA 0.83 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,224] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,334] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-09 22:45:09,335] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,335] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,440] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-09 22:45:09,441] [INFO] [utils.py:828:see_memory_usage] MA 0.85 GB Max_MA 0.85 GB CA 1.13 GB Max_CA 1 GB +0: [2023-02-09 22:45:09,441] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.99 GB, percent = 6.2% +0: [2023-02-09 22:45:09,441] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-09 22:45:09,441] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-09 22:45:09,441] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-09 22:45:09,441] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-09 22:45:09,442] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-09 22:45:09,443] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-09 22:45:09,444] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-09 22:45:09,444] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004794597625732422 seconds +0: [2023-02-09 22:45:09,445] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-09 22:45:09,455] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=22 [0, 22) STAGE_PARAMS=146525952 (146.526M) TOTAL_PARAMS=146525952 (146.526M) UNIQUE_PARAMS=146525952 (146.526M) +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +6: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +5: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +2: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/mp_rank_00_model_states.pt. +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:45:09,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:45:09,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:45:09,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:45:09,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:45:09,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:09,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:45:09,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:09,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:09,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:45:09,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:45:09,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:45:09,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:09,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:09,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:09,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:45:11,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:45:11,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:45:11,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,084] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:45:11,085] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,086] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,088] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:45:11,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:45:11,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:45:11,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:45:11,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:45:11,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:45:11,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,258] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:45:11,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:45:11,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:45:11,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,266] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,268] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:45:11,269] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,274] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,336] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:45:11,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:45:11,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:45:11,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:45:11,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:45:11,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:45:11,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:45:11,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:45:11,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,638] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:45:11,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:45:11,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:45:11,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:45:11,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:45:11,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:45:11,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:45:11,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:45:11,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:45:11,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,726] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:45:11,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:45:11,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:45:11,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:45:11,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,791] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:45:11,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:45:11,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:45:11,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:45:11,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,872] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,874] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,875] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,876] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,877] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,878] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:45:11,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,879] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,880] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,882] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,883] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,884] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,885] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:45:11,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:45:11,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,892] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,894] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:11,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:45:11,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:45:11,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:45:11,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,986] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:11,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:11,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:11,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,022] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,026] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,028] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:45:12,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,038] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:45:12,040] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:45:12,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:45:12,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:45:12,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:45:12,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:45:12,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,211] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,213] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,214] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,215] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,216] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,217] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,219] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:45:12,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:45:12,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:45:12,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:45:12,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +0: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:45:12,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:45:12,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:45:12,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:45:12,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:45:12,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:45:12,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,410] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +5: [2023-02-09 22:45:12,412] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +6: [2023-02-09 22:45:12,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,426] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +2: [2023-02-09 22:45:12,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,428] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +6: [2023-02-09 22:45:12,428] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +7: [2023-02-09 22:45:12,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,428] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +6: [2023-02-09 22:45:12,428] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +2: [2023-02-09 22:45:12,429] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +7: [2023-02-09 22:45:12,429] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +6: [2023-02-09 22:45:12,430] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +0: [2023-02-09 22:45:12,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,430] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +1: [2023-02-09 22:45:12,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,433] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +1: [2023-02-09 22:45:12,433] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +3: [2023-02-09 22:45:12,433] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +3: [2023-02-09 22:45:12,434] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +1: [2023-02-09 22:45:12,435] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +1: [2023-02-09 22:45:12,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,435] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +0: [2023-02-09 22:45:12,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,437] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +0: [2023-02-09 22:45:12,437] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +2: [2023-02-09 22:45:12,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,438] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +2: [2023-02-09 22:45:12,438] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-09 22:45:12,440] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +5: [2023-02-09 22:45:12,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,443] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +5: [2023-02-09 22:45:12,444] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +4: [2023-02-09 22:45:12,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,446] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +4: [2023-02-09 22:45:12,446] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +6: [2023-02-09 22:45:12,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,447] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +4: [2023-02-09 22:45:12,447] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +4: [2023-02-09 22:45:12,448] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +6: [2023-02-09 22:45:12,449] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +5: [2023-02-09 22:45:12,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,452] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +7: [2023-02-09 22:45:12,452] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +7: [2023-02-09 22:45:12,453] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +5: [2023-02-09 22:45:12,453] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +1: [2023-02-09 22:45:12,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,456] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +5: [2023-02-09 22:45:12,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,457] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +1: [2023-02-09 22:45:12,457] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +5: [2023-02-09 22:45:12,459] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +5: [2023-02-09 22:45:12,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,460] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +2: [2023-02-09 22:45:12,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,460] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +2: [2023-02-09 22:45:12,461] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +5: [2023-02-09 22:45:12,461] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +5: [2023-02-09 22:45:12,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,462] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +5: [2023-02-09 22:45:12,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,463] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +7: [2023-02-09 22:45:12,463] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +6: [2023-02-09 22:45:12,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +7: [2023-02-09 22:45:12,464] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +5: [2023-02-09 22:45:12,464] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +5: [2023-02-09 22:45:12,465] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +6: [2023-02-09 22:45:12,466] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +3: [2023-02-09 22:45:12,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,466] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +5: [2023-02-09 22:45:12,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:45:12,467] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +6: [2023-02-09 22:45:12,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,468] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +6: [2023-02-09 22:45:12,468] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +5: [2023-02-09 22:45:12,468] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +7: [2023-02-09 22:45:12,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,469] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +6: [2023-02-09 22:45:12,469] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +1: [2023-02-09 22:45:12,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,470] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +1: [2023-02-09 22:45:12,465] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +1: [2023-02-09 22:45:12,467] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +1: [2023-02-09 22:45:12,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,468] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +1: [2023-02-09 22:45:12,470] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,470] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +1: [2023-02-09 22:45:12,470] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +1: [2023-02-09 22:45:12,472] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +7: [2023-02-09 22:45:12,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,472] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +2: [2023-02-09 22:45:12,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,473] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +0: [2023-02-09 22:45:12,473] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +1: [2023-02-09 22:45:12,473] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,474] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +7: [2023-02-09 22:45:12,474] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +2: [2023-02-09 22:45:12,475] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +0: [2023-02-09 22:45:12,475] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +1: [2023-02-09 22:45:12,475] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +3: [2023-02-09 22:45:12,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,476] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +3: [2023-02-09 22:45:12,477] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +7: [2023-02-09 22:45:12,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,480] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +7: [2023-02-09 22:45:12,481] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +2: [2023-02-09 22:45:12,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,483] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +2: [2023-02-09 22:45:12,485] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +6: [2023-02-09 22:45:12,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,487] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +3: [2023-02-09 22:45:12,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,487] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +6: [2023-02-09 22:45:12,488] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +2: [2023-02-09 22:45:12,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,489] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +7: [2023-02-09 22:45:12,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,489] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +3: [2023-02-09 22:45:12,489] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +6: [2023-02-09 22:45:12,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,489] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +2: [2023-02-09 22:45:12,490] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +7: [2023-02-09 22:45:12,490] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +6: [2023-02-09 22:45:12,491] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +1: [2023-02-09 22:45:12,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:45:12,496] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +3: [2023-02-09 22:45:12,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,498] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +1: [2023-02-09 22:45:12,498] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +3: [2023-02-09 22:45:12,499] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +0: [2023-02-09 22:45:12,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +0: [2023-02-09 22:45:12,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,507] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +0: [2023-02-09 22:45:12,508] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +0: [2023-02-09 22:45:12,509] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +4: [2023-02-09 22:45:12,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,511] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +4: [2023-02-09 22:45:12,512] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +7: [2023-02-09 22:45:12,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:45:12,512] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +7: [2023-02-09 22:45:12,514] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +3: [2023-02-09 22:45:12,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,514] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +0: [2023-02-09 22:45:12,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,515] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-09 22:45:12,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,515] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +3: [2023-02-09 22:45:12,515] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +0: [2023-02-09 22:45:12,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:45:12,516] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +0: [2023-02-09 22:45:12,516] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +0: [2023-02-09 22:45:12,517] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +0: [2023-02-09 22:45:12,518] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +3: [2023-02-09 22:45:12,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,520] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +3: [2023-02-09 22:45:12,521] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +4: [2023-02-09 22:45:12,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +3: [2023-02-09 22:45:12,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:45:12,522] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +4: [2023-02-09 22:45:12,523] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +3: [2023-02-09 22:45:12,524] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +6: [2023-02-09 22:45:12,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:45:12,528] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +4: [2023-02-09 22:45:12,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,529] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +6: [2023-02-09 22:45:12,530] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +4: [2023-02-09 22:45:12,531] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +2: [2023-02-09 22:45:12,551] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,551] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +2: [2023-02-09 22:45:12,553] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +4: [2023-02-09 22:45:12,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,554] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +4: [2023-02-09 22:45:12,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,555] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +4: [2023-02-09 22:45:12,555] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +4: [2023-02-09 22:45:12,557] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +2: [2023-02-09 22:45:12,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:45:12,559] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +4: [2023-02-09 22:45:12,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_146m14b1b5/global_step21553/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:45:12,560] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +2: [2023-02-09 22:45:12,560] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +4: [2023-02-09 22:45:12,562] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +0: successfully loaded checkpoint from checkpoints_146m14b1b5 at iteration 0 +7: time (ms) | load-checkpoint: 3109.50 +0: estimated model parameters: 0.146525952 +0: estimated model parameters without embeddings: 0.106319616 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-09 22:45:12 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.024987 seconds +0: number of documents: 28730568 +0: > dataset split: +0: train: +0: document indices in [0, 28730568) total of 28730568 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.065 seconds +0: total number of samples: 6713794 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.021651 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.009 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-09 22:45:25 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 20112.72 | train/valid/test-data-iterators-setup: 11408.53 +0: [after training is done] datetime: 2023-02-09 22:45:25 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.334061E+00 | lm loss PPL: 2.805203E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2820872: Thu 09 Feb 2023 10:45:46 PM EET diff --git a/146m14b1b5/sbatch_146m14b1b5.sh b/146m14b1b5/sbatch_146m14b1b5.sh new file mode 100755 index 0000000000000000000000000000000000000000..fededf7a6153920c02bdbe46cfe538c7ae954e8f --- /dev/null +++ b/146m14b1b5/sbatch_146m14b1b5.sh @@ -0,0 +1,162 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b1b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train1b5.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=5_517_578 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 55_176 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b1b5/sbatch_146m14b1b5val.sh b/146m14b1b5/sbatch_146m14b1b5val.sh new file mode 100644 index 0000000000000000000000000000000000000000..ee8f7d6ed35e005b390b517454221c7cca72bff5 --- /dev/null +++ b/146m14b1b5/sbatch_146m14b1b5val.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=146m14b1b5val +VARIANT_CKPT=146m14b1b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +# DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train14b.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_14B_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_140M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 11300000000 +# -> Samples: 5517578 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --eval-only true \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/146m14b1b5/tensorboard_146m14b1b5/events.out.tfevents.1675875178.nid006806.93431.0 b/146m14b1b5/tensorboard_146m14b1b5/events.out.tfevents.1675875178.nid006806.93431.0 new file mode 100644 index 0000000000000000000000000000000000000000..cd7d7c6201a74eb9737a04271bd9aa5b837d9156 --- /dev/null +++ b/146m14b1b5/tensorboard_146m14b1b5/events.out.tfevents.1675875178.nid006806.93431.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b2ebe5eaf8a4c0a189d517a330b89747a8dfa8f6a40c90a89748166b97f38c +size 38441517 diff --git a/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675555841.nid006665.110632.0 b/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675555841.nid006665.110632.0 new file mode 100644 index 0000000000000000000000000000000000000000..d144a6d8ff8384facb3fee662e14cc4cfa182794 --- /dev/null +++ b/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675555841.nid006665.110632.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f28c6bdf575eb10550555e3857d30574972bc46605c496d1a75162789ca45d3 +size 980 diff --git a/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675975463.nid007045.48760.0 b/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675975463.nid007045.48760.0 new file mode 100644 index 0000000000000000000000000000000000000000..9c14eddff4f6f1dd4826a492c9d5462e2716b453 --- /dev/null +++ b/146m14b1b5/tensorboard_146m14b1b5val/events.out.tfevents.1675975463.nid007045.48760.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b7c14e875c475a3d70152d6bd7ed9b75ad4cc2e306932d315453c41e4569bc +size 980 diff --git a/146m14b1b5/transformers/config.json b/146m14b1b5/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639868e787b6f7945a258cf690dc9b5dba7be4a6 --- /dev/null +++ b/146m14b1b5/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 768, "n_layer": 15, "n_head": 12, "n_inner": 3072, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/146m14b1b5/transformers/pytorch_model.bin b/146m14b1b5/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a5c7d72d7492dcc487e8bcb2dc115430f24dedc --- /dev/null +++ b/146m14b1b5/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5a24c6477eae03189ca4733a3e36a2d88b43b89521e6d8c64563b3fde251d4 +size 418947557 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7a3a9eda99fa74cbb5ae159faf7553d627dfee3 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc74028075d3efb474e09324831f39fe1143a4a160bdfd8bd3429909e8823f6 +size 78980887 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fad58d05988c53dddf901339e19cb5d7703c029 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aed590c97b5f557568aaaf4a3247d8b68a9f26656252e7da5a02d4faf97dcf6 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88096919954b2c5aae0eeba55529aaa9e608a4ba --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf61010c9f866a0d7aea18e086f07a2da93f9f3de285a3b1791f3791d5bbf7d +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b57032655dd44f397cfca60a2194d9b88b976ea --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b976d3be29345881182dec8147d7ad98a872bbd30609a35aefb904fb9d9c0b +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fb6436ef6770785c30afe1c23375893a94cfa63 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0b8c8919b832082f195b7223e0c1323ba7d3e555729d9944fb506a7369960b9 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f5ac5995754cc3905c23c23db6c7eadbc9f1ba7 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cefb51b55fbd8e45a684eaf14442798a6bca078450f6754f1fd5e20c3b7afc13 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53133db6ab334a82bd17c71cb85e7d7cd88d4fe3 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99ab3f36b105e6cba6227d12d77f9605dae6259f677be5cfbf8e7e685ebade5 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..63be8c99ab06bb2b4df3fda656930ba866bd5587 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fea32c919cec926f9605fd826d2aac852f428b7107403f44c1ee93ec58697162 +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a85b29c0104a766f7bb4f9becfe9711b69af4f73 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14be3efe0e7c2a031185db007c832cd96498dc4cd4a088aac95ac814cf063c3a +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc971d8112c5cf9517ca8b79c1290e01c1aaca30 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c112beac0cd9260e55887f9f8e2d579047cb3543c4f787160a7a9cf831c8d +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4717a97691ae589d46586ab37cc7239a581a0e8 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ebe3fef611df28541d8411a5b140fce968803d8bb97fdf90faab0feecfbf34 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eefa7c51af4053a7cd829815ff2cd66002196a43 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff784cbecc6b1ff651aa788d53819c1fbffb57d07dc6aa71de3dad3265371e8 +size 78980951 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1073876b00a2fe186bdf59425343606654dcb03b --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db53a69f4f8eeb9dab949b925ac7f1465da184f6120c7228b18700b30f66a8e +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a9fbdc8ac763da35e6ed0e7d59bc09416254e4e --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461835a5dc7119746711ce6d5a9bb4ded8e81cebcbefc4dfadbd010ddbf0a7f1 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f44126c5fe6770c50d48a99d970d82a847a69789 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ff62db4cf36bcfa9cc74df0b0966aa227582848191b984fe59ba2a364db32d +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef66a78c3f57d82e44648f420100b6bcee5c5706 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8419556c02051e618b6d144388f228151545d0ed4588b7283d7011b102cb68ba +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..507ebd30bf1cc2aa5f80c0280a84bde77567034f --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673dd99c48342906bc7d323bd1df4393dcc80e639c95d62a739c65367a2e7254 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcffc159ad3bd9b1b17bb87123f899a3c990d2f5 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc8f133776d5eb058ae3a3669899c136d6c3209f86d2b96c6c3c6f4946e0ee2 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1eef19c03698f352a8180980fa524efa05185517 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a17c8afc4f67315510a05f07b697bdb76c1dfa2cbae3ad4a505c5fbc7c083140 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc9af8eda82b0000e32769c92c9ee067242a9a3d --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4516d2d18a9c21469873d560be1b764bc062c7edac4a1b9d66fba92b1573cbe9 +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed1f034a6beeb4c1148974d70a48ffe9fcc70fa8 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda51b6b2a1a35134a32b57ec89a7a1bcd6b9a65d1a7cc33cdec115afee8706b +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..56b3f2df476b2635daf443a540557ef35765581d --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317c81b0b9e5fc3b15533fbf4ad4b3af6ec38c22e62c303cc2cbe4f20f1fc6d6 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..03b03aa442ea9caa114170da0998cab6c4121d92 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57f275baa52a9bd7afc1e69915fb4c5db98861a4320c5b1be5715ada002f9588 +size 78980887 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..49e391e1994e18a9e69e590dc7a922e02cee2ef9 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ca8da04f827498e20f48e2ee932896ec13438d0e229891e5f816cf45f1879d +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c18af742b69a6ff4285c55f45e63c150ccde450 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1af1dc33f8299dc0a9e5a4f6fcd7a43d92d55245ca422827e793f921e35b12 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d832016a166f8da7d02fe7fe660dbd8775b5a421 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc16702fc73e37b7850e0c59aa91a59db460efdfc51c5db19ee34d380feff3d +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a0cfe903df866131df8a661fa81352d73a1170a --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:472bd806794cf58da2f2b27667923e480a303f0f0f25ede7aa24e43c5ab62c86 +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82496dc7ec4f1ece0af9b7b3c092137467a8cab3 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dec6830df9bb976f20410bd155c8fe2cc199720181ad69ac64823792b77a01 +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc8afe8a01657d4932b9d277604c54a33f4a5fd8 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd7c15256c705124914ce17cdbb5a6c8372b5d212bdccac253a520ac7dfc11d +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d286757e104becb0a507d22e92feeb2e7919923 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99eac3ebf126e0893081f815765db0fd9a4d5d0da74d3a105825bdd7930b21d +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bb87f3c1c2123e4086b86fd0192ac5fa488c5d6 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d5fb23989b8ca1f3e216d3bc302f6f7d32f775c77ee9be93fcf6fe427de9a5 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..14358e929b6c254c124b2e55763b556f0a7810c6 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf6699e235e9ee503860bae99ffdf42c70c61186e603205c7c9958b86efeb34 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7e22cbabb594d4112edfb1b4045027a86563633 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b3e71ca19ef772f72bbddcdf514684f72d23cbce34f267b0162efe9cd16db9 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6a39048edebe7000a06b2d96f5cce31d0e81793 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fa431ca192e09cb862d5947be792cca78f6648ba61220f44e5e935027069f10 +size 78980887 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b497b163f2d7e5d1768597a7559bf0fad633c4f6 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099dfac7c5c94c2f6b2a3236bf3bd9206a91a50f5acc430a7411213f7836bcec +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..28873fbf7b73a8d8e1e3520597deedcd8be0a997 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2707b26bc3a94960bc9c2cb7e9901e96ed0446c6bf880f819af4d97d62fcd7ed +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..84834ce21f5723af00603bcf01e57c19f1ded9cf --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7e10c93df57165fcc391e456a960b4266d7d1b3ecc56d72b61cccfa71eafd9 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e918e94e627c713d9c60e29dc2879994d8c47d64 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3858d7400cd9ecaee47e78d0a6553595943412a0743973f0969e4205b6e2dd33 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3dbdc4c984427021d52f9d9ee0a13bfbddc2265 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c468e6461269c174fd6fc2546dc67f81061d867fba8fb61dcdbdb5f6c0c4e785 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e27b76cbfe17df58ae3776f103437e41c2e647a --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4015ba05b3e13e84669467ccbf8785369e79c0d0bf96565a158d3d5eed7f394a +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5a0ec41d7797f1940330161d36da8a4713b2657 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a45ccd4a2627dd07b760cc1952abc09be0b38649f09c9c9e2de83e070ea7798 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..78039eb9a379fe3392f53907df4816707588aed2 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f72800e8d28a4fb5156bfa0feb71a9af633bdffe772f14b7aca0584370839c2 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd1a2751e67b1e0ae9ad89978b176fe3330447e2 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee06dc8134d1caca8ab5b416a0042ade07f29b5f04691ec4253fabe2db8226b +size 78980770 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65cfaddf99b800cb91035a119e158b79182668e5 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b61acd78ab7b2d48ac7f45dd388839e7b2e85a0ea8458934c7e30acbe1c0db +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..16124b7b11527fa399fb4dbbc0634527854076ed --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1bc4d347ce811b83b8574ffd8fb52ada1e6722c9cd0ab1503701f5052b2f427 +size 78980951 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ab3d45c5d4d40333d6b27d52dabc4811dbc98cb --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203888c53a000cb77cea778e11e5926a19954bf0a3a9947558082985c050b36f +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f0f9891f7696de3eead5da6f6ce80ef8023ee65 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2691d2140e86c3190ad86fca68e5537f3ea6677d6820cff3af90a80eabdd9b +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e962551e1801ea3f7696cca52c286cc71544d272 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60663536c920cc4a1bc69a6c3f2bb9ebc068a7307f3ea7266a066dff3a7dae5d +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d5c085a7236eaa07bd04b0b1b70890460b41299 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef042fc2902b760fa4315fd8e01402833601c40d0a0ec36255262c8fbc0074f +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c3679a9ec32b48e9d80cc1d4298e3c8bf815085 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bdbfa764738c7535fcf341d02d8719816dba5a3280636d4d617dc6cda730a4 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..860b9e3479e303091c2821c8516acf68465a8666 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e6b9ef6423c6c8e34d6664865d06354f180a8d9de746be341c4fbfae2f1ea9 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea3434e3bdd495f5c448d1bc9fbe340963f5d209 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b4acf28c9cd4c64c816d20bd93742015a96323e051f92a411823f2de7c1d45 +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a694a7a83ed1305ff42123d063a50534279b0d49 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab0b7d12ad7a71d61c4b4a152ca5eab0a2a8112fd4f7e41c2c77b2b7289784c1 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9244dad2b5d518aeda3365b60952ee9f35824332 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab3fd35693a4f2040df4b5ba5dd01bc1378ad1d278ba741ea2821a390d57a02 +size 78981026 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e4b4fc40b0e63535dc5a68a720ea00dfb6620e --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f363e7c8a1a49771c140988f8c1c3f04dc663003c6fd19c2c517c4c0b52647d +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3853d9afb5a7dc8bc6dc2d5611683b85fe82a89 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da66c28a1ca3aa6b85b9ad5325fbe4bd67cc8e8528a1ef4b61e1698060cc7385 +size 78980887 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3f24336e099483b909377e3409d1cf3f3cbd767 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca55dd1066907802e441583fd2c635a6976add3149832648281659b135202ba7 +size 78980962 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..08f469bb648926466f34ccf4b9919d711e1a7526 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c839133bb8ad4b0704e15dae68bcac212e326f6f6e0a4ba4f27157d45128d192 +size 78981090 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a22cd510ed570824c8882d48cf0b638f838d52ec --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854172438499c99c72937129b6ab353b9345eb9c912d24f9a19fd7c0a76ba07a +size 78980834 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97ec1575826011ec683ea9c106d8bd2c6d75a2c7 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05848c0ca0b9c79b3698a08504288547eb5e55c138cf352306a8d7fc40d28552 +size 78980898 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..88d6ad02e4e86d829a68677c1eb6a8696fbcbffc --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84650b679f238abbb8c8a830e16f40f9ef1f6167a12ab2c959f9f9663f1fdd2e +size 78980823 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..01e9318b26ec0809de0e111db6df1bca7c78d824 --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e807a6058c6bffc81bae4921bbbd7c54982dea42ff35d39c7bc8bf03cee6897 +size 78980887 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3bd8c796d3d53fd047b912a416f0d190ecbf0bd --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1eb6e7c8df5a0bbf4b9cee5c300a70a178e3ca266cf51175b81b6d5b4537ad +size 78980951 diff --git a/421m3b9100m/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/421m3b9100m/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ac83c90bd6a1c0fbcf333f839b2f90fe07b3a1e --- /dev/null +++ b/421m3b9100m/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bd965ca1f3057eab2345f03f1979275f46388fa534f5e4975efd243b9e555f +size 78980887 diff --git a/421m3b9100m/global_step7508/layer_01-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2efb717ac06b797b8ea6ec19a0e75e904d8276c8 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6eb9e89a9b7ad5654e4edb620b2514b0e3f371774eb2fbb88a216bfc9f2bbc +size 134022403 diff --git a/421m3b9100m/global_step7508/layer_03-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5145633867bc746195585bc7743544b8eb8ab95c --- /dev/null +++ b/421m3b9100m/global_step7508/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914d86e2af8771fa3dbe3751949d0b5873bd845881919d3235317dda6302cc25 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_04-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c067730144c679bb0609723c2945d324ae305eb3 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a413ce3e079fb1e1e0dd8e4e2e9fc4967e330e51ec0cb2c873feb57e64c8d40 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_05-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65eba073329261d14f51700f7838279b5a802558 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb8be88946a0fa06522b93305af9c5466c2d8d2977beb50eacf7789611afe3c +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_06-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f977adabe1922eb2c38c30c455ead15423fee1a4 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f6576d6dfc16e3f5f87f6eaa14e782729707f81fccfef5f8311803baa0d0ef2 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_07-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..544deff8f548b9c5cacbaeea91f9834073660058 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d52f098036b79506f01b0470f7011cc27f82219b0a9996633c23d2186dba14 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_08-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4e1c33ad8bbe7b5338ab2f2eff81daf69a4c1f4 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29ea3a8799579af3321ec0f94988cbf462ecee00b5ae9b9829aab095d8ed9df +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_09-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..32a9c9911da2261067a959964223959983d5ac9a --- /dev/null +++ b/421m3b9100m/global_step7508/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef9a74fbae48275fd8eb52caf6e603ff5649ecb89e5b094f10920f36b430732 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_10-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb926b411a1e7c89a016a1cec82fb9e15dba354a --- /dev/null +++ b/421m3b9100m/global_step7508/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edfbc87034534f768f72eb2a270d6e98e105e5ecf3d21ee4f3f1b958f76a5d8 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_11-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a861219946b123f1b16c6805f01656fad78ea6c5 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5254db45992a683bff42746bcf0080f8a84f92287b927a2f660918fae5802711 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_12-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..65f82c8d5926b691b0a3a327310f6eddca3cb62a --- /dev/null +++ b/421m3b9100m/global_step7508/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e1e3d4499ec96d5a644c94fee56b74b260eab6ddf147b3b8b6639c03e05b57 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_13-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d60bdaa1d603fe34c9e85822db21e1780250cf0d --- /dev/null +++ b/421m3b9100m/global_step7508/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21316373f4913e664b1167f76a97f7ccccee5fc5ce0545e42b64e72ed71fef8 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_14-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8979d74a09f129024b48148108c9a14d544aad7e --- /dev/null +++ b/421m3b9100m/global_step7508/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9d84bca85658a92ef46412454a0106fe3e9f616b77175908f69e7688741eb28 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_15-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a8d15be444301d899f9c90439ea0d99743339ec --- /dev/null +++ b/421m3b9100m/global_step7508/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120527d15913b266e79dfe12e8e85e92a4b79153f4f930e7cdf5a821392c8225 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_16-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..edc4d1949ab1b5b6118839fbfc633ffbe4b8ceaa --- /dev/null +++ b/421m3b9100m/global_step7508/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc073ad792fa7e1caed01b7aedddb8be1e5996737a5ac635a525df0bce3b284 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_17-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4939fccd2996e57548b342cf6dde3b115b4cd9ab --- /dev/null +++ b/421m3b9100m/global_step7508/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95aa43b3da5cde7ad1eecda1fc8441faaff57fd55fd78cc23cbc70b4148cc405 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_18-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_18-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93bbf8db74c71131828f794bb71d5e46cc316d48 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_18-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5767beb432a59b468bae249f189865400c8e9f11c1af969870b7abfa1b6a84 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_19-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93657a618ab8c556e692135eeccf6e5761d14a58 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b03e232f6d0273ccae34f7120e949acef906d1a7649be856560cb3fe32d846 +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_20-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_20-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55b489b82baec81000eb4251759afbeb3f33b1e3 --- /dev/null +++ b/421m3b9100m/global_step7508/layer_20-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8122208614f8ab1b3f7373076901b91cf5fdd7bb6637d87a34a2f6911e6c533c +size 39359235 diff --git a/421m3b9100m/global_step7508/layer_22-model_00-model_states.pt b/421m3b9100m/global_step7508/layer_22-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9f3455051646d95152b44e1dce0f1ec9c34f77a --- /dev/null +++ b/421m3b9100m/global_step7508/layer_22-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5105c002ec49e8f9335be9593540fb88abbec86dbb272ad013511c397cf13f4 +size 6339 diff --git a/421m3b9100m/global_step7508/mp_rank_00_model_states.pt b/421m3b9100m/global_step7508/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f705671f6ccbc2f6c62778a0a26ed7c0c4dcfe7e --- /dev/null +++ b/421m3b9100m/global_step7508/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79c98420ab02aacd4db32e7b2146cc5e20c66ece0bed013154cad2e596547be +size 37747 diff --git a/421m3b9100m/logs/3165626.err b/421m3b9100m/logs/3165626.err new file mode 100644 index 0000000000000000000000000000000000000000..6ed0b0c9e0c5f9ac413a587fa1c27bd154732bbf --- /dev/null +++ b/421m3b9100m/logs/3165626.err @@ -0,0 +1,1125 @@ +4: 2023-02-28 00:01:58.642197: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642201: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642209: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642211: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642214: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642215: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:01:58.642203: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642869: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642874: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642878: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642879: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642870: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-28 00:01:58.642881: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659850: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659842: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659853: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659849: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659867: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659869: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-28 00:01:58.659857: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681520: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681526: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681533: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681525: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681532: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-28 00:01:58.681532: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729110: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729109: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729117: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729120: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729105: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729118: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729127: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-28 00:01:58.729126: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756629: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756634: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756638: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756632: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756643: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756637: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756636: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-28 00:01:58.756626: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759807: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759806: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759811: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759821: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759825: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759826: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-28 00:01:58.759815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802491: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802493: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802500: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802504: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802497: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802498: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-28 00:01:58.802489: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-28 00:02:00.239278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239285: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239286: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239290: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239284: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:00.239648: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239651: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239656: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239656: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239661: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239660: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239662: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-28 00:02:00.239663: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281030: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281042: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281038: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281048: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281037: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281036: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281040: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:00.281376: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281375: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281380: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281381: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281384: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281385: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281387: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-28 00:02:00.281389: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285534: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285533: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285536: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285543: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285551: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285548: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:00.285905: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285904: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285908: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285911: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285911: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285913: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285914: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-28 00:02:00.285918: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.286632: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286632: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286636: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286644: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.286637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:00.287003: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287007: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287012: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287013: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287014: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287014: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287018: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-28 00:02:00.287020: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334390: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334398: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334404: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334396: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334405: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:00.334740: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334739: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334744: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334745: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334746: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334748: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334751: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-28 00:02:00.334753: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394409: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394403: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394406: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394411: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394412: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394419: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:00.394795: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394800: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394802: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394806: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394809: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394814: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394810: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-28 00:02:00.394822: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.402731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402731: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402737: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402746: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402741: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402744: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402743: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.402738: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:00.403142: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403147: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403152: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403154: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403155: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403156: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403158: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-28 00:02:00.403160: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475308: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475305: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475300: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475569: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:00.475576: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475576: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475581: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475581: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475588: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475591: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:00.475597: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-28 00:02:04.195102: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195107: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195111: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195114: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195119: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195128: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.195129: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.196447: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196369: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196445: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.196462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196378: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196459: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196382: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196456: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196387: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196465: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-28 00:02:04.196469: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196391: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.196393: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196708: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196714: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196713: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196724: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.196728: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197029: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197046: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197042: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197043: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197041: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197047: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197049: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.197055: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197361: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197362: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197375: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197375: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197372: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197375: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197373: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197374: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197377: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-28 00:02:04.197392: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197394: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197395: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197396: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-28 00:02:04.197397: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-28 00:02:04.198537: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198537: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198541: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-28 00:02:04.198647: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198544: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198550: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-28 00:02:04.198550: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198555: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-28 00:02:04.198549: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198647: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-28 00:02:04.198550: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198559: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-28 00:02:04.198559: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198650: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-28 00:02:04.198569: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-28 00:02:04.198569: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198588: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-28 00:02:04.198655: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-28 00:02:04.198600: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198659: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198663: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198662: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198662: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198661: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198670: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198671: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198674: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198672: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-28 00:02:04.198677: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-28 00:02:04.198686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198917: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198922: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198924: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198924: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198936: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198937: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198941: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198943: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198944: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198944: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-28 00:02:04.198951: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-28 00:02:04.198973: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.204949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204952: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204958: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204960: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204958: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204963: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204966: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-28 00:02:04.205458: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.204963: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205462: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205468: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205474: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205470: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205480: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.205479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206466: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206472: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206486: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206481: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206481: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206488: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206490: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.206492: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206920: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206922: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206927: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206925: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206929: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206930: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206936: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206937: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206937: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206945: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206945: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206943: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206957: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-28 00:02:04.206964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-28 00:02:04.206978: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-28 00:02:04.207664: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207601: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207666: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-28 00:02:04.207602: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207667: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-28 00:02:04.207602: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207614: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207611: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207668: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207668: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207671: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207673: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-28 00:02:04.207678: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207681: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207685: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207687: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207686: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207689: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-28 00:02:04.207689: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208274: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208280: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208289: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208289: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208292: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208291: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-28 00:02:04.208300: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208302: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208304: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208305: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-28 00:02:04.208307: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207610: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-28 00:02:04.207620: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207621: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207620: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207623: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-28 00:02:04.207625: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +4: Successfully preprocessed all matching files. +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +4: Building extension module utils... +4: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +0: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +0: Building extension module utils... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +4: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +4: Loading extension module utils... +0: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +4: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +3: Loading extension module utils... +5: Loading extension module utils... +3: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +3: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +3: Loading extension module utils... +5: Loading extension module utils... +3: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +4: +4: +4: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...Loading extension module utils... +4: +4: +4: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils...Loading extension module utils... +0: +0: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/421m3b9100m/logs/3165626.out b/421m3b9100m/logs/3165626.out new file mode 100644 index 0000000000000000000000000000000000000000..e84bf7bb732653be590eb99c1f93bb06f1812fe4 --- /dev/null +++ b/421m3b9100m/logs/3165626.out @@ -0,0 +1,6436 @@ +Model parameters: d_model 1280 ffw_size 5120 kv_size 128 n_heads 10 n_layers 18 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 18 --hidden-size 1280 --num-attention-heads 10 --kv-channels 128 --ffn-hidden-size 5120 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-421m3b9100mval --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --tensorboard-dir tensorboard_421m3b9100mval --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_421m3b9100m --load checkpoints_421m3b9100m --train-weighted-split-paths-path train100m.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/3165626.json --zero-stage 0 +START 3165626: Tue 28 Feb 2023 12:01:41 AM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 40.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 48.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 37.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 46.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 44.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 45.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 41.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 37.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 40.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 46.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 47.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 44.0c 99.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 37.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 38.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 47.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 39.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 51.0c 81.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 41.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 41.0c 80.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 38.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 41.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 45.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 42.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 42.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 34.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +2: Launching on nid006550 (2/8), master nid005424 port 9999, GPUs 8, CUDA: True +7: Launching on nid006555 (7/8), master nid005424 port 9999, GPUs 8, CUDA: True +1: Launching on nid005425 (1/8), master nid005424 port 9999, GPUs 8, CUDA: True +6: Launching on nid006554 (6/8), master nid005424 port 9999, GPUs 8, CUDA: True +0: Launching on nid005424 (0/8), master nid005424 port 9999, GPUs 8, CUDA: True +5: Launching on nid006553 (5/8), master nid005424 port 9999, GPUs 8, CUDA: True +4: Launching on nid006552 (4/8), master nid005424 port 9999, GPUs 8, CUDA: True +3: Launching on nid006551 (3/8), master nid005424 port 9999, GPUs 8, CUDA: True +7: > setting tensorboard ... +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/3165626.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... None +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 5120 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 1280 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-421m3b9100mval +0: kv_channels ..................................... 128 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_421m3b9100m +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 18 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_421m3b9100m +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_421m3b9100mval +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-28 00:02:19,545] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.093 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: ninja: no work to do. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: [1/1] c++ layer_norm_hip_kernel.cuda.o layer_norm_cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so +0: >>> done with compiling and loading fused kernels. Compilation time: 18.627 seconds +0: time to initialize megatron (seconds): 0.967 +0: [after megatron is initialized] datetime: 2023-02-28 00:02:40 +0: building GPT model ... +0: [2023-02-28 00:02:41,106] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-28 00:02:41,107] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-28 00:02:41,107] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.09 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-28 00:02:43,099] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=25 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: ParallelTransformerLayerPipe +0: 19: ParallelTransformerLayerPipe +0: 20: ParallelTransformerLayerPipe +0: 21: undo +0: 22: MixedFusedLayerNorm +0: 23: EmbeddingPipe +0: 24: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-28 00:02:43,303] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-28 00:02:43,304] [INFO] [utils.py:828:see_memory_usage] MA 0.79 GB Max_MA 0.79 GB CA 0.86 GB Max_CA 1 GB +0: [2023-02-28 00:02:43,304] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.11 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-28 00:02:43,306] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-28 00:02:55,923] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-28 00:02:55,924] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-28 00:02:55,924] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-28 00:02:55,938] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-28 00:02:55,938] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-28 00:02:56,059] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-28 00:02:56,060] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.79 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-28 00:02:56,060] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.81 GB, percent = 6.1% +4: ninja: no work to do. +4: Time to load utils op: 0.25557899475097656 seconds +0: ninja: no work to do. +0: Time to load utils op: 0.1984868049621582 seconds +0: Time to load utils op: 0.20302510261535645 seconds +0: Time to load utils op: 0.20375323295593262 seconds +0: Time to load utils op: 0.20306158065795898 seconds +4: Time to load utils op: 0.20306062698364258 seconds +4: Time to load utils op: 0.202728271484375 seconds +4: Time to load utils op: 0.20197391510009766 seconds +4: Time to load utils op: 0.20186781883239746 seconds +4: Time to load utils op: 0.20215606689453125 seconds +4: Time to load utils op: 0.20191168785095215 seconds +0: Time to load utils op: 0.20270085334777832 seconds +0: Time to load utils op: 0.20293331146240234 seconds +0: Time to load utils op: 0.20294737815856934 seconds +4: Time to load utils op: 0.20235395431518555 seconds +2: Time to load utils op: 0.21062517166137695 seconds +2: Time to load utils op: 0.2106330394744873 seconds +2: Time to load utils op: 0.21064281463623047 seconds +2: Time to load utils op: 0.21067547798156738 seconds +2: Time to load utils op: 0.2106635570526123 seconds +2: Time to load utils op: 0.2106926441192627 secondsTime to load utils op: 0.21069622039794922 seconds +2: +2: Time to load utils op: 0.21069097518920898 seconds +7: Time to load utils op: 0.20995426177978516 secondsTime to load utils op: 0.20972466468811035 secondsTime to load utils op: 0.21026158332824707 seconds +7: +7: +7: Time to load utils op: 0.21013212203979492 secondsTime to load utils op: 0.20948457717895508 secondsTime to load utils op: 0.21009349822998047 seconds +7: Time to load utils op: 0.20989298820495605 seconds +7: +7: +3: Time to load utils op: 0.21252775192260742 seconds +3: Time to load utils op: 0.21254611015319824 seconds +3: Time to load utils op: 0.21254920959472656 seconds +3: Time to load utils op: 0.21252942085266113 seconds +3: Time to load utils op: 0.21255803108215332 secondsTime to load utils op: 0.21254205703735352 seconds +3: +3: Time to load utils op: 0.212554931640625 seconds +3: Time to load utils op: 0.21259546279907227 seconds +5: Time to load utils op: 0.21136903762817383 secondsTime to load utils op: 0.2113792896270752 seconds +5: +5: Time to load utils op: 0.21140432357788086 seconds +5: Time to load utils op: 0.21139287948608398 seconds +5: Time to load utils op: 0.21143507957458496 seconds +5: Time to load utils op: 0.2114095687866211 secondsTime to load utils op: 0.21141290664672852 seconds +5: +5: Time to load utils op: 0.21144628524780273 seconds +6: Time to load utils op: 0.2112717628479004 seconds +6: Time to load utils op: 0.21128296852111816 seconds +6: Time to load utils op: 0.21135807037353516 secondsTime to load utils op: 0.21132779121398926 seconds +6: +6: Time to load utils op: 0.21135640144348145 seconds +6: Time to load utils op: 0.2113485336303711 secondsTime to load utils op: 0.21134495735168457 seconds +6: Time to load utils op: 0.2113802433013916 seconds +6: +1: Time to load utils op: 0.21179628372192383 secondsTime to load utils op: 0.21178841590881348 seconds +1: +1: Time to load utils op: 0.21180105209350586 seconds +1: Time to load utils op: 0.21181631088256836 seconds +1: Time to load utils op: 0.21182608604431152 seconds +1: Time to load utils op: 0.21184182167053223 secondsTime to load utils op: 0.21184110641479492 secondsTime to load utils op: 0.2118546962738037 seconds +1: +1: +7: Time to load utils op: 0.5042412281036377 seconds +4: Time to load utils op: 0.0004703998565673828 seconds +4: Time to load utils op: 0.0005066394805908203 seconds +4: Time to load utils op: 0.0004622936248779297 seconds +4: Time to load utils op: 0.0004062652587890625 seconds +4: Time to load utils op: 0.0004563331604003906 seconds +4: Time to load utils op: 0.0004363059997558594 seconds +4: Time to load utils op: 0.0004379749298095703 seconds +4: Time to load utils op: 0.00048542022705078125 seconds +0: Time to load utils op: 0.00057220458984375 seconds +0: Time to load utils op: 0.0005152225494384766 seconds +0: Time to load utils op: 0.00047659873962402344 seconds +0: Time to load utils op: 0.00042319297790527344 seconds +0: Time to load utils op: 0.0004191398620605469 seconds +0: Time to load utils op: 0.0004475116729736328 seconds +0: Time to load utils op: 0.00044536590576171875 seconds +0: Time to load utils op: 0.4059150218963623 seconds +7: Time to load utils op: 0.0004734992980957031 seconds +7: Time to load utils op: 0.00044536590576171875 seconds +7: Time to load utils op: 0.0003712177276611328 seconds +7: Time to load utils op: 0.0004935264587402344 seconds +7: Time to load utils op: 0.00044274330139160156 seconds +7: Time to load utils op: 0.0004582405090332031 seconds +7: Time to load utils op: 0.0003426074981689453 seconds +3: Time to load utils op: 0.0009763240814208984 seconds +3: Time to load utils op: 0.0010194778442382812 seconds +3: Time to load utils op: 0.0011172294616699219 seconds +7: Time to load utils op: 0.00037860870361328125 seconds +3: Time to load utils op: 0.0013463497161865234 seconds +3: Time to load utils op: 0.0013322830200195312 secondsTime to load utils op: 0.0013773441314697266 seconds +3: +3: Time to load utils op: 0.0013928413391113281 seconds +3: Time to load utils op: 0.00141143798828125 seconds +2: Time to load utils op: 0.0008397102355957031 seconds +2: Time to load utils op: 0.0009179115295410156 seconds +5: Time to load utils op: 0.0006058216094970703 seconds +2: Time to load utils op: 0.0011081695556640625 seconds +2: Time to load utils op: 0.0010302066802978516 seconds +2: Time to load utils op: 0.001026153564453125 seconds +2: Time to load utils op: 0.0010328292846679688 seconds +2: Time to load utils op: 0.001007080078125 seconds +2: Time to load utils op: 0.0011751651763916016 seconds +5: Time to load utils op: 0.0007567405700683594 seconds +5: Time to load utils op: 0.0009849071502685547 seconds +5: Time to load utils op: 0.0008864402770996094 seconds +5: Time to load utils op: 0.0009393692016601562 seconds +5: Time to load utils op: 0.0011248588562011719 seconds +5: Time to load utils op: 0.00112152099609375 seconds +5: Time to load utils op: 0.0011858940124511719 seconds +6: Time to load utils op: 0.0009453296661376953 seconds +6: Time to load utils op: 0.0009875297546386719 seconds +6: Time to load utils op: 0.0011107921600341797 seconds +6: Time to load utils op: 0.0011429786682128906 seconds +6: Time to load utils op: 0.0011670589447021484 seconds +6: Time to load utils op: 0.001207590103149414 seconds +6: Time to load utils op: 0.001138925552368164 seconds +6: Time to load utils op: 0.0012574195861816406 seconds +1: Time to load utils op: 0.0011992454528808594 seconds +1: Time to load utils op: 0.0012249946594238281 seconds +1: Time to load utils op: 0.0015213489532470703 seconds +1: Time to load utils op: 0.0015521049499511719 seconds +1: Time to load utils op: 0.0015511512756347656 seconds +1: Time to load utils op: 0.00157928466796875 secondsTime to load utils op: 0.001569986343383789 seconds +1: +1: Time to load utils op: 0.001605987548828125 seconds +0: [2023-02-28 00:02:56,585] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-28 00:02:56,586] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.78 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-28 00:02:56,587] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:56,705] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-28 00:02:56,706] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-28 00:02:56,706] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:56,809] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-28 00:02:56,810] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-28 00:02:56,810] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:56,915] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-28 00:02:56,915] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:56,915] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:57,018] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-28 00:02:57,019] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:57,019] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:57,124] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-28 00:02:57,125] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:57,125] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:57,228] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-28 00:02:57,228] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:57,229] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.97 GB, percent = 6.2% +0: [2023-02-28 00:02:57,336] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-28 00:02:57,337] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:57,337] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:57,440] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-28 00:02:57,440] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-28 00:02:57,440] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.96 GB, percent = 6.2% +0: [2023-02-28 00:02:57,440] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-28 00:02:57,441] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-28 00:02:57,441] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-28 00:02:57,441] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-28 00:02:57,441] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-28 00:02:57,441] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-28 00:02:57,442] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-28 00:02:57,443] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-28 00:02:57,443] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004303455352783203 seconds +0: [2023-02-28 00:02:57,444] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-28 00:02:57,456] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=25 [0, 25) STAGE_PARAMS=421207040 (421.207M) TOTAL_PARAMS=421207040 (421.207M) UNIQUE_PARAMS=421207040 (421.207M) +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,479] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-28 00:02:57,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,751] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-28 00:02:57,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-28 00:02:57,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-28 00:02:57,792] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-28 00:02:57,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-28 00:02:57,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,820] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-28 00:02:57,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-28 00:02:57,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-28 00:02:57,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-28 00:02:57,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-28 00:02:57,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-28 00:02:57,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-28 00:02:57,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:57,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-28 00:02:57,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-28 00:02:57,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:57,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:57,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-28 00:02:57,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-28 00:02:57,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:57,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-28 00:02:58,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-28 00:02:58,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-28 00:02:58,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,165] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-28 00:02:58,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-28 00:02:58,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,185] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-28 00:02:58,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,182] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-28 00:02:58,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,298] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,306] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,315] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,316] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,317] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-28 00:02:58,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-28 00:02:58,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-28 00:02:58,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-28 00:02:58,323] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-28 00:02:58,323] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-28 00:02:58,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-28 00:02:58,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,494] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-28 00:02:58,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-28 00:02:58,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,504] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,507] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,510] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,514] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,515] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,516] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,517] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,518] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-28 00:02:58,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-28 00:02:58,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,522] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-28 00:02:58,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-28 00:02:58,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,536] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-28 00:02:58,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-28 00:02:58,652] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-28 00:02:58,653] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,656] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-28 00:02:58,671] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-28 00:02:58,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,677] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-28 00:02:58,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-28 00:02:58,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-28 00:02:58,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-28 00:02:58,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-28 00:02:58,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-28 00:02:58,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-28 00:02:58,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,937] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,940] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-28 00:02:58,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-28 00:02:58,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-28 00:02:58,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-28 00:02:58,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,949] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-28 00:02:58,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,950] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:58,955] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-28 00:02:58,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-28 00:02:58,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:58,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-28 00:02:58,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:58,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:58,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:58,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:58,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-28 00:02:58,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-28 00:02:58,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:58,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-28 00:02:59,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-28 00:02:59,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,109] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-28 00:02:59,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-28 00:02:59,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-28 00:02:59,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-28 00:02:59,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-28 00:02:59,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-28 00:02:59,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,108] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-28 00:02:59,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,278] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,279] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,280] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,280] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,281] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,281] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,282] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,283] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,283] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,284] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,285] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,286] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,287] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,287] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,288] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,289] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-28 00:02:59,289] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,290] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,291] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,292] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,293] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,293] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,294] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,295] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-28 00:02:59,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,295] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,296] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,297] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,297] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,298] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,299] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,300] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,300] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,301] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,301] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,302] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,303] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,303] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,304] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,305] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-28 00:02:59,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,306] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,309] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-28 00:02:59,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,312] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,312] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,313] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,314] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-28 00:02:59,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,314] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,315] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,317] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,319] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,322] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-28 00:02:59,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,440] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,446] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-28 00:02:59,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-28 00:02:59,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-28 00:02:59,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-28 00:02:59,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-28 00:02:59,451] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,457] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,458] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,463] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,463] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-28 00:02:59,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,465] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-28 00:02:59,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,467] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,467] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,468] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,469] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-28 00:02:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,470] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,471] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,471] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,472] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,473] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-28 00:02:59,475] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,476] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-28 00:02:59,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,477] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,478] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-28 00:02:59,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-28 00:02:59,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-28 00:02:59,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-28 00:02:59,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,636] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,639] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,645] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-28 00:02:59,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,646] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,646] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-28 00:02:59,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-28 00:02:59,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-28 00:02:59,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-28 00:02:59,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,652] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,654] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-28 00:02:59,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,823] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-28 00:02:59,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-28 00:02:59,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-28 00:02:59,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-28 00:02:59,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-28 00:02:59,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,847] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-28 00:02:59,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-28 00:02:59,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-28 00:02:59,859] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,863] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,866] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-28 00:02:59,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:02:59,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:02:59,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:02:59,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:02:59,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-28 00:02:59,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-28 00:02:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:02:59,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-28 00:02:59,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:02:59,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:02:59,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:02:59,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:02:59,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:02:59,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:02:59,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:02:59,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:02:59,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:02:59,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:03:00,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:03:00,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-28 00:03:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-28 00:03:00,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-28 00:03:00,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-28 00:03:00,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-28 00:03:00,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,073] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,077] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-28 00:03:00,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-28 00:03:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,083] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,084] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,085] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,086] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,087] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,087] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,090] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-28 00:03:00,091] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,092] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,092] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,094] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,094] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-28 00:03:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,096] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,098] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-28 00:03:00,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,103] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,103] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-28 00:03:00,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,104] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,105] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,106] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-28 00:03:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,108] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-28 00:03:00,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,110] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-28 00:03:00,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,115] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,233] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,235] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,236] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,238] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-28 00:03:00,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-28 00:03:00,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-28 00:03:00,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,251] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,259] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,260] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-28 00:03:00,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,261] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-28 00:03:00,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,263] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-28 00:03:00,264] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-28 00:03:00,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,265] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-28 00:03:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,267] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-28 00:03:00,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,268] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,269] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,270] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-28 00:03:00,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,270] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,271] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,271] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-28 00:03:00,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,272] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,273] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,276] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,278] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-28 00:03:00,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-28 00:03:00,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-28 00:03:00,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-28 00:03:00,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-28 00:03:00,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-28 00:03:00,420] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-28 00:03:00,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-28 00:03:00,427] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-28 00:03:00,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-28 00:03:00,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,564] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,566] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,567] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-28 00:03:00,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,568] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,569] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-28 00:03:00,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-28 00:03:00,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,575] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,576] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-28 00:03:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,577] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,578] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,578] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,579] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,579] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,580] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,581] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,583] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-28 00:03:00,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-28 00:03:00,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-28 00:03:00,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-28 00:03:00,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-28 00:03:00,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-28 00:03:00,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-28 00:03:00,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-28 00:03:00,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-28 00:03:00,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-28 00:03:00,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-28 00:03:00,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-28 00:03:00,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,752] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,753] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,754] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-28 00:03:00,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-28 00:03:00,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,757] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-28 00:03:00,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +5: [2023-02-28 00:03:00,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-28 00:03:00,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-28 00:03:00,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-28 00:03:00,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-28 00:03:00,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-28 00:03:00,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-28 00:03:00,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +0: [2023-02-28 00:03:01,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,017] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +0: [2023-02-28 00:03:01,020] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +4: [2023-02-28 00:03:01,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,029] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +1: [2023-02-28 00:03:01,029] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +4: [2023-02-28 00:03:01,032] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +1: [2023-02-28 00:03:01,033] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +2: [2023-02-28 00:03:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,037] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +6: [2023-02-28 00:03:01,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,038] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +7: [2023-02-28 00:03:01,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,039] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +2: [2023-02-28 00:03:01,040] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +6: [2023-02-28 00:03:01,041] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +1: [2023-02-28 00:03:01,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,041] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +7: [2023-02-28 00:03:01,042] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +3: [2023-02-28 00:03:01,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,043] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +1: [2023-02-28 00:03:01,045] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +3: [2023-02-28 00:03:01,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,046] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +3: [2023-02-28 00:03:01,047] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +3: [2023-02-28 00:03:01,049] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +4: [2023-02-28 00:03:01,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,050] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +4: [2023-02-28 00:03:01,053] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +0: [2023-02-28 00:03:01,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,057] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +1: [2023-02-28 00:03:01,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,058] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +0: [2023-02-28 00:03:01,060] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +1: [2023-02-28 00:03:01,061] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +2: [2023-02-28 00:03:01,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,065] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +2: [2023-02-28 00:03:01,068] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +5: [2023-02-28 00:03:01,070] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,071] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +1: [2023-02-28 00:03:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,071] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +0: [2023-02-28 00:03:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,072] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +6: [2023-02-28 00:03:01,072] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +6: [2023-02-28 00:03:01,072] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,072] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +4: [2023-02-28 00:03:01,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,074] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +5: [2023-02-28 00:03:01,074] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +1: [2023-02-28 00:03:01,074] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +0: [2023-02-28 00:03:01,075] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +6: [2023-02-28 00:03:01,075] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +6: [2023-02-28 00:03:01,076] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +7: [2023-02-28 00:03:01,076] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,076] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +4: [2023-02-28 00:03:01,077] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +0: [2023-02-28 00:03:01,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,079] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +7: [2023-02-28 00:03:01,079] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +0: [2023-02-28 00:03:01,082] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +4: [2023-02-28 00:03:01,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,088] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +3: [2023-02-28 00:03:01,089] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,089] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +4: [2023-02-28 00:03:01,092] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +3: [2023-02-28 00:03:01,092] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +6: [2023-02-28 00:03:01,093] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,094] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +0: [2023-02-28 00:03:01,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,095] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +6: [2023-02-28 00:03:01,097] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +3: [2023-02-28 00:03:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,098] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +6: [2023-02-28 00:03:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,098] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +0: [2023-02-28 00:03:01,098] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +5: [2023-02-28 00:03:01,098] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,099] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +7: [2023-02-28 00:03:01,099] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,100] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +3: [2023-02-28 00:03:01,101] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +6: [2023-02-28 00:03:01,101] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +5: [2023-02-28 00:03:01,102] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +7: [2023-02-28 00:03:01,103] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +1: [2023-02-28 00:03:01,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,106] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +2: [2023-02-28 00:03:01,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,107] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +1: [2023-02-28 00:03:01,109] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +2: [2023-02-28 00:03:01,109] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +5: [2023-02-28 00:03:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,110] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +5: [2023-02-28 00:03:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,110] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +2: [2023-02-28 00:03:01,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,111] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +7: [2023-02-28 00:03:01,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,111] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +3: [2023-02-28 00:03:01,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,112] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +2: [2023-02-28 00:03:01,114] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +7: [2023-02-28 00:03:01,114] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +0: [2023-02-28 00:03:01,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,115] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +5: [2023-02-28 00:03:01,115] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +5: [2023-02-28 00:03:01,115] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +3: [2023-02-28 00:03:01,115] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +0: [2023-02-28 00:03:01,119] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +4: [2023-02-28 00:03:01,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,119] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +4: [2023-02-28 00:03:01,123] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +2: [2023-02-28 00:03:01,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,124] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +4: [2023-02-28 00:03:01,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,126] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +7: [2023-02-28 00:03:01,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,127] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +7: [2023-02-28 00:03:01,127] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +4: [2023-02-28 00:03:01,129] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +1: [2023-02-28 00:03:01,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,131] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +1: [2023-02-28 00:03:01,131] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +3: [2023-02-28 00:03:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,131] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +0: [2023-02-28 00:03:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,131] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +2: [2023-02-28 00:03:01,131] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +3: [2023-02-28 00:03:01,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,132] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +1: [2023-02-28 00:03:01,133] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +5: [2023-02-28 00:03:01,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,134] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +3: [2023-02-28 00:03:01,134] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +2: [2023-02-28 00:03:01,135] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +3: [2023-02-28 00:03:01,135] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +0: [2023-02-28 00:03:01,136] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +2: [2023-02-28 00:03:01,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,136] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +5: [2023-02-28 00:03:01,138] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +6: [2023-02-28 00:03:01,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,139] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +2: [2023-02-28 00:03:01,140] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +6: [2023-02-28 00:03:01,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,141] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +6: [2023-02-28 00:03:01,142] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +7: [2023-02-28 00:03:01,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,143] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +6: [2023-02-28 00:03:01,144] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +7: [2023-02-28 00:03:01,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,145] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +4: [2023-02-28 00:03:01,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,146] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +7: [2023-02-28 00:03:01,147] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +7: [2023-02-28 00:03:01,148] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +7: [2023-02-28 00:03:01,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-28 00:03:01,150] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +4: [2023-02-28 00:03:01,150] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +6: [2023-02-28 00:03:01,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-28 00:03:01,151] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +1: [2023-02-28 00:03:01,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,151] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +0: [2023-02-28 00:03:01,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-28 00:03:01,153] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +7: [2023-02-28 00:03:01,153] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +6: [2023-02-28 00:03:01,154] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +1: [2023-02-28 00:03:01,156] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +0: [2023-02-28 00:03:01,156] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +5: [2023-02-28 00:03:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,156] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +5: [2023-02-28 00:03:01,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,157] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +1: [2023-02-28 00:03:01,158] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-28 00:03:01,158] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +5: [2023-02-28 00:03:01,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +5: [2023-02-28 00:03:01,160] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +1: [2023-02-28 00:03:01,161] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +3: [2023-02-28 00:03:01,164] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-28 00:03:01,164] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +4: [2023-02-28 00:03:01,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-28 00:03:01,167] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +3: [2023-02-28 00:03:01,167] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +4: [2023-02-28 00:03:01,170] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +2: [2023-02-28 00:03:01,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-28 00:03:01,178] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +2: [2023-02-28 00:03:01,181] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +5: [2023-02-28 00:03:01,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b9100m/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-28 00:03:01,194] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-02-28 00:03:01,198] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +0: successfully loaded checkpoint from checkpoints_421m3b9100m at iteration 0 +7: time (ms) | load-checkpoint: 3751.84 +0: estimated model parameters: 0.42120704 +0: estimated model parameters without embeddings: 0.35419648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-28 00:03:01 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.006320 seconds +0: number of documents: 208931 +0: > dataset split: +0: train: +0: document indices in [0, 208931) total of 208931 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_100M_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.004 seconds +0: total number of samples: 48805 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.044533 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.014 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-28 00:03:15 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 20467.23 | train/valid/test-data-iterators-setup: 12771.97 +0: [after training is done] datetime: 2023-02-28 00:03:15 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.898620E+00 | lm loss PPL: 4.933430E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 3165626: Tue 28 Feb 2023 12:03:41 AM EET diff --git a/421m3b9100m/sbatch_421m3b9100m.sh b/421m3b9100m/sbatch_421m3b9100m.sh new file mode 100644 index 0000000000000000000000000000000000000000..0847447afa20c249c59a703062503dd0a589384a --- /dev/null +++ b/421m3b9100m/sbatch_421m3b9100m.sh @@ -0,0 +1,163 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b9100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train100m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1_922_149 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 19_221 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b9100m/sbatch_421m3b9100mval.sh b/421m3b9100m/sbatch_421m3b9100mval.sh new file mode 100644 index 0000000000000000000000000000000000000000..0c92935c67c681946d370196d8b9173972cce3c6 --- /dev/null +++ b/421m3b9100m/sbatch_421m3b9100mval.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b9100mval +VARIANT_CKPT=421m3b9100m + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train100m.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b9100m/tensorboard_421m3b9100m/events.out.tfevents.1677506679.nid006372.122550.0 b/421m3b9100m/tensorboard_421m3b9100m/events.out.tfevents.1677506679.nid006372.122550.0 new file mode 100644 index 0000000000000000000000000000000000000000..c191f95c7ad5588ef3c0d63366e792fffa5db89d --- /dev/null +++ b/421m3b9100m/tensorboard_421m3b9100m/events.out.tfevents.1677506679.nid006372.122550.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a612835d44544743c7593b1e4a0958f7b388cfbb9b5259a8381b9e286beff9 +size 13359042 diff --git a/421m3b9100m/tensorboard_421m3b9100mval/events.out.tfevents.1677535339.nid006555.13901.0 b/421m3b9100m/tensorboard_421m3b9100mval/events.out.tfevents.1677535339.nid006555.13901.0 new file mode 100644 index 0000000000000000000000000000000000000000..c69583ecb0545f9e2f3a07707a32f4343cbbabe8 --- /dev/null +++ b/421m3b9100m/tensorboard_421m3b9100mval/events.out.tfevents.1677535339.nid006555.13901.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a400e2e3d162c695970bb0274b866896278e862c178e03619b10921fb14764 +size 980 diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..dbacab4bc11685034ddc6fc5a97fca1bf8cfe653 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.23177272289200984, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.02232644342648684}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05527594145082781, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0012465231724252742}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2418587672030188, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004019327605144833}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08501748924707608, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016897110414027562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.025961495442502317, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007780782480280117}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11843347133692429, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0028540813294636257}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.040231183755865985, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010882095141455182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.05395710508825529, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011929395949061676}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2379697178202554, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003961524066291127}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08314138498678003, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016231446043623204}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05337500845318596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011839667538989299}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.23441022719094787, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003815976303927829}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.08213023336769926, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015968743295787242}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..8c72f2654354a8c96f42fe1d42c0301feeeea981 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.15591930086532843, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020043337299334937}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04915696336088872, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013719245742553428}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22817875412187882, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038498391014422984}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07451602193826748, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001531724832572773}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.020531505897721725, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006614769676442327}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09594413074539906, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002613650288767959}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03199910420801712, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009604719879955616}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.047883171012056816, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001343005391086916}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.22184044926729388, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037104935930818562}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07254382626050829, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001482218914183275}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.047546859924243255, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013419792425392316}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22014694911717558, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036698611197668388}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07197825182983052, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014742649681246808}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..dfdaceedeceba4a2fea9d71c1c5542e9b77e0705 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.13871707140912973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.016788036903946976}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04461027579233427, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010252458063135287}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21811803941951569, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037017929524820227}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.06987553531042545, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014539433830700974}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.018660136528242423, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006140753895605142}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08755745192640091, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002430794457259159}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.02909294918208801, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008964441540124039}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04358571012467286, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010061709766373032}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21211372669415113, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035758418823002756}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.06823871063465026, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014261323213445975}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.0431816158725768, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010025065034385686}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.20966310389431556, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035384273192345045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.06756980792712407, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014205362992606334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f60774ee49cb3e8d89dfdf65554bc6bcf6f0ef93 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.11690247533544619, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.009287305842204673}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.046245688129935014, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010696458495622631}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22042220582796518, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0037371854746322845}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07202771447835704, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001490885359262395}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.019580176815454965, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006506841517107448}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09054710218942696, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024953240626813225}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03035641719434565, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009321288238861425}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04511844470650973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010391655732875631}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21441315183041518, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035987476288693045}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07027243705758501, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001452780815054275}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.044690873379109534, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010335538927214356}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21226321516952545, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035914422547391055}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.06958140828669782, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014430649224632791}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c4199e5b817e9c0ec8b4660887bce3ce443bc57a --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.10677700841911389, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.010532753032062412}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04615281662149837, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0010879113367318798}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22148172798476662, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003759468700074842}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07157002920717628, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014616935888478546}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.019174418049251147, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006244871255048812}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09103456901778384, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024825202804016214}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.029883470617163364, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009026590175097606}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.045119423861603725, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001073039312603156}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21556020554547964, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003632709619034944}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.06993498443202883, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014395169753379103}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04476216662558973, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010644849383889573}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21355300560358506, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035901059049123925}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.06934638240081208, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014247023478863169}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..9d5c275fdcfc6d6e7c0a4914831de23742b49aa7 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.15138575433897905, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.017900594645392207}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04823668690787371, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011345767818553378}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22292464568286927, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038456993631155742}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07429447162765516, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015383260022193645}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.020106627977607386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006604378076018168}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.0930666896736683, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025812913573701997}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03125224633271726, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009564038941008088}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.046926018454489425, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011014019730810139}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21666597922428132, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037251822118275196}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07230650733729013, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014968722133129424}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04661651332318032, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011024401738617675}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21459919419928164, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036887425804951967}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07174191910647153, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001488758460774504}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..7891aad70dc861e2e874ca340d0997549b09ec0a --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.09447866279876174, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016120241044285232}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1540564186245438, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023725318234165483}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.10832056502796245, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016461665492261007}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012978391395703715, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00048074981881528943}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.02301810460593164, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009956565819111785}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.015181811389840852, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005506979609486634}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08193505277480057, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013007255437820148}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13628710672412833, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0020475851989507207}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09453921947632203, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013355023556588757}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08784439043968462, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014790920660564867}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14407726529605594, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022187007547342565}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10089536393117866, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001511628829699065}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7157135005698502, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04647014375603136}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ec5f9247c32675f150f378b82dfbdd8bc95eb327 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.1119730515754374, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015546284623857009}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1809241968351015, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023564572667088978}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.12776545479953536, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001583327649945923}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012687266208996356, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00047683091570961515}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.02295689455252069, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009715194431889655}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.015034669686256276, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005599076489425881}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08248229363295397, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001025565708877038}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.13703985539484212, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017570256829565357}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.09467857796328046, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010413737833186599}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10561979752507195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014505402845061354}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.17142948921774814, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022322231249741783}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.12061788807935782, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014744781895989557}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7568216796206494, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0365762458878659}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..cbd7cf765c169e96cf860c2ae5372c79f4e95b74 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10014548191146704, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014565924281840863}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1575653498735645, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002075553615634516}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.113136060241949, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014392617841219371}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.008598460740984758, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0003923280052930601}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.014622841033730277, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0007520706503860998}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.009894560043561524, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00043909962900646094}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.07777296552153214, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010189852904572438}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12593569249466863, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.001616788777290793}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.0885732558882455, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010105020660668759}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09456741339374493, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013485738233839097}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.14953299345084117, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001945811896778034}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.1070061210085247, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013313074012279494}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5008074434105538, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.027420163187722234}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..f29b86ae016a8cbcc87ff049ee9d6929280bf671 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08833884249120096, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016673032171567173}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1315357579913616, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002286330317290917}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09530805481842063, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015958221853691465}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.009063060451693672, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00045344650796241257}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.015727013632766268, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009142365063516984}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.010182474205209647, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004898627779511664}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0721907541408395, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013116484262805982}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.10968615915706327, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019015907007798752}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.07807114760808846, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0012320036077104104}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.08248440495112994, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00153250206702351}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.123214740487544, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021215127721801582}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.08904837251695515, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014644806154194177}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.6451490842782704, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.04949835258471997}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..95707009e5ff9d9cd802ca8dc6f33bd113238bc8 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.03202423756442962, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014355653077500936}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04617716025641568, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.001862897722705631}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.032415438282286, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012577519098217766}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.004132309151704993, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004806369034292581}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.006544256549277832, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000674451226759297}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.004010615388886775, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00032253874673817534}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.027672839936926416, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012307971594838533}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.04061635263335589, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016394282301373483}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.028103964819551033, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001065948161074736}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.029479508560887577, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0013229802106936625}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.04254618276718864, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017078183730069745}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.029753940030187404, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011391554285021324}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.19863349159696345, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020082456809440215}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ae4f60025ab823c9a14213eaf0985a879f86dca2 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.006559425974826442, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007772707987241637}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.008162664783148844, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0008557272559576417}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.005982548603636205, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006026463885272619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0010078505625016065, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00020525860229414494}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0014689900879043029, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00034502461378481663}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0008789405383568104, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00014915160142531196}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.005718323694944019, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006701572692651454}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.007339449594823124, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.000770907895860834}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.005275238912702371, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005250606523599148}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.006186482491790637, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007409724280389656}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.007662196121244796, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008003804501317619}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.005633569731566957, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005703046910121007}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 7.258601739337029e-07, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 1.0366520457308342e-06}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..217b4ef82d6bff18daffdd2a972decf2f72e1ed3 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 2.3401189092178583, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.040276117196671284}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.1338482575359331, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0024648821332688554}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.19751057677787456, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0022572096596852336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.14110806522366368, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00163799349633168}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.03247407533046338, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0009841360244194405}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.04905120516211981, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0010857431861598476}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.03462813772314848, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007607431713226254}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.1242011591418883, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0020399638390954226}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19323930452458596, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0022556438726476693}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.13585027913820621, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015569718092981529}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.10227961147329598, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.002007607729564016}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.14913014161326776, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017880704560848586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.10688711791196877, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013147429613898317}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..42a8e59273ecda4fec89bed3c15daba925dccd7e --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.5992517367535997, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12500914847960642}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3614234345919415, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0034603741768298732}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2669063847937934, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002337075988797385}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.28101229235208536, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020265969737045648}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.12030442505390465, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003538445104828453}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06897408855755997, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013139643398713688}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07333475675979936, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013071931289660096}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.27329668658465245, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003315427145096111}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19403178278886463, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0017664752356725145}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20524138627600222, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001545962308793527}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.30454450005440276, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034144218184777}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21993558079129125, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002032200498111229}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2322460841505594, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018029363292571669}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a51c2c6cf5c7becb354a12ebec6d0467b5a14c9d --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.5942635908606877, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09147815934513456}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3793434189953508, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004069614391187794}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.26746437140757334, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002526704673951825}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.279394643327864, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021951717114593317}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.1515456989078487, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.004472870355521199}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07217770344112004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013474768357982336}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0768604575160761, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013107245087658804}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.29555872383394716, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004057889592592142}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19576962242521914, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00187325384455503}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2058924032347511, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016251665479717697}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3245908498877826, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004095922097833713}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.22064922758817399, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021668246347809153}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23146409262761347, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019069704764335866}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..e26ba7011a9fe6690aa51749f9eb28bf56476385 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.8277549889238736, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11727775129407206}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.411306959214004, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004596205232308239}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.26249329655423426, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025813575811259462}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2778774267600531, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002231862571930029}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.19162481139662949, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005328850643116758}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07577164851702119, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014194426398597818}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.08217996110518319, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013750056974164472}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.33043415470701937, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.004742365592495772}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1937730893993055, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019119918138972675}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20735400433932427, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016689881709039034}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.35986441072271874, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.004720685076235124}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21855414102493695, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022133351382317944}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23289659318871578, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019422284749949631}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d0125a19043f8e870880e116605287dce2e020c0 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.8730750001922387, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08862530091808378}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.43734665127896905, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.004915431766267602}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.25825014881139846, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026138643131703014}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.27642834239969777, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002243190375697101}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.22342493014598566, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.005855663857563032}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.0774777502850809, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014095351417513376}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.0853395429614827, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013678253333793263}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.35796641748717295, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0051466390298742296}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19165634222104913, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019262622276203}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20776551068527133, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016556654268637695}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.38458437667991324, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005085579108635989}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2141255477692916, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022188315932043135}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23107954066521458, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001935297756579334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..54a0e318ad49177867d56c4310838b2ba824ef03 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.871663587272024, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08213288901946655}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.463975157198053, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.005172688538225158}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.25222528921228415, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025995788226328095}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2752761295397579, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022444379807100992}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.25251781213190033, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.006227887703719169}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07903749837559827, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001389474253801609}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.08905416913450527, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013768518037889515}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.38595874277972353, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.005425372277596022}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19012858098709634, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019398139592262148}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.2100839933880261, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016694463788081287}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.4122015013890839, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.005345816177605482}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.21127057478563274, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022217687345829993}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23236587517475857, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019368568086863874}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_0.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..85dfe570b1e3b04a3fdbb5c4a7d03e7c2b7a95ac --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.10435313975011247, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015651354656744249}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2555439781765851, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003558285673738991}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.14603634865959045, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020841313366621607}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.017103812367996997, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006930620273294279}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.043341553909434255, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00176569772400685}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.024173373329113998, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009688116374499329}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08772513822430761, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012080239215113645}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.21657452718044604, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002873568760674157}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.12308403161540907, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001625110608788102}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.08042448723073105, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012210878969545749}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.19946512972695213, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0029584275829818217}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.11295811701049689, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016569363950514997}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.8723920335368474, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07351478331186045}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_1.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..6c696954fe6db7342cc2799e8b48aa44bc164bee --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08747592902444107, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014531287916684458}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21644112261135764, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0033107140270396554}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12299227773307847, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001958112277088821}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01106556247219742, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006097089149891652}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.028166209498187943, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001533311064611577}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.015710142149030784, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008583704462574906}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07465145377891358, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011626174667832513}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18628849900104955, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0027360293889638074}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.1051867608217552, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015744992567553885}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06827654871246044, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001131987134152434}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17105163608110932, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026933609788779758}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09628432822719876, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015376419286776773}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5964684071601108, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06838453086802421}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_2.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..9b6b7a9c6b574a909261e7fa8fb1c5101abc4638 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08572791024630257, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001384127706576123}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21467210269736098, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003317038170555432}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12092380897447323, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018851371517061449}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.011439172714049112, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006012082075144695}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.02989642087800327, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016215511857418903}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.016339899446017426, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008589336952150353}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.0752829050790085, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011596162782922079}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18955113381130423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002835961354831114}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.106349864837288, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015846051612748348}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06663478234734735, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001066868518646431}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16847089979874588, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026626534377023315}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09420537882087585, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014658748413253703}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6089988048165847, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0603775435206676}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_3.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..088da3126654ab3eb42cc52f1e488d929fbb5154 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08725627518416645, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017540235333849527}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.2065502217091037, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0035051865393515855}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11927879260429021, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0020977305010526533}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.012719951156236566, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007406965606955484}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.029787145304670665, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015368778585668954}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.017198804793613687, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008946401389735868}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07721191609461389, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014853368263540478}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18418802513615165, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0030256632043210233}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10577166580317919, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017689287079966291}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0686057369834113, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001422941372147247}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16359614543666398, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002849671331740175}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09377983788010003, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016661080858413875}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.6352481576908428, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07228550314335824}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_4.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..b62998cf0c16f1b0436ae847f04c6b21474dd58a --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.03048686866903372, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002057687267513022}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.05350300840117225, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003130762403545177}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.034344690937185784, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001989633638126816}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.003995899001837998, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00050499691399483}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.008145814980496137, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009371172275023301}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.004942647365129906, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005595762399260895}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.025984491055271917, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016847394634243319}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.04659937629170288, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002710020078389942}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.02958035424455412, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016803867000653534}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024293545373462375, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016361709915266752}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.04256754491806482, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0025166035120746206}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.027240573379215583, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015817488059235681}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.300928021815898, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.06925636358571334}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_5.json b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..58addd2c3ed3dfe4ea307748ae12ea5e8685c110 --- /dev/null +++ b/421m3b91b5/evaluation/generation/agg.421m3b91b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.002805834500150757, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007612204769420191}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.0024013325042936823, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006357123052113102}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.002540260392770392, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006784884870699844}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0004745709032612473, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00017749034421704978}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.00037964313856597286, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0001367309522495532}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0004167222426927057, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0001512636536345213}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002003747091807142, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0005275557743060757}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0017802029206495263, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00046661766903965664}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0018478732641861921, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004841531159026887}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.002248874633359708, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006139815040464879}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0019486166880306997, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005194547723758253}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.002045339368848435, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005490084314042811}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 2.8887196381450717e-38, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 8.880640660739498e-33}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7796deb6f282bf0f63be6b9255e8e26ef22a5eb9 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a7082d88dbcb640af710765e7f32155233b2408ad9440e28c9751bf75d88c0 +size 4153186 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4657162885205de227ea8b3ee5fa348c8e51eb0e --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064f5b417e300e0597e778fd632dfbc49ef901f97a1e2e2264977733e25a839f +size 5174567 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a25cea773ecc18517b5a4e422a8f2669d576b7e3 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9209cac9106454b6b0779f1f576d65d0dc69b57b2f4c2ef48d2df3108498ea9e +size 6077074 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dcfe8502bb39d59b0c9e76fbbd0fa8529a9e901a --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6101ec0e89e879daf9b1862a5a720855fd5ef696ffe82406605bb5574ccb2d8e +size 6966652 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5ab8210ce6f0c9579e508d23980c0e7e207e685e --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70111606a4e0a0293295b16859d5c3f701f7798b9c4de8f9e7dbab765e985949 +size 7857615 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..40d44893493d2851e0d1d7dfffd073ea2bb2b40c --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aaea7b066135c90ff07dbfd3f6054098e3e3ca5a5732d2cca06140e59180515 +size 8723562 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fd62e15acf6f6e7db2cc4e1cc5999a4d6e1ce396 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c60ac81081801a015913b64dc97782a37c6b379c028474b9809c72a56e9eee9 +size 7651213 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..1d3805d3da3d79b81004a4ea4731a459ef3077a4 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333d7c0a425315b98230cbac21058e7311c85fa56c159237c885cc8404725073 +size 13338806 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..792c5c427a082f61003b90574d0d62451d42cefa --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c910124dcc9a2e1665ae46c09354fb47c0caa43b2905dcc93f1b8804b6b5f069 +size 18943921 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e6c8795d4d252ee0939b58e8d8e2a2795806de6c --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0791d5ac7f783d997212d0b60b323ac7360d882613f19b64574cc8bbe52ce4c +size 24351563 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d597638610cc572832056f3ed418b74ba4d8e48d --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b42b9cb4f6dee0a06fe17118ade31eeed48d3a3c56e8e0d0c08011d71595417 +size 29476518 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..4d9b2afe21a5bdcb0ae3aeabd56b9aca56179e25 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c1e7038f059f5c427fb80115a4735ee8cb51076e001ea54aa11e66e4f894b4f +size 34799737 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f3402a7d818475fbd6d9f9f3375ec137966f2f5d --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:955dd74c9849120d81093ac803984891e88378cfc053b833c49a2d74d3dc14f1 +size 4253830 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3706d9454142adb4be82954b55856883c6ee5615 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b52f2d3a58f0ff9de15a17636ecd665eda79a83688bc3a85e7c2b6a7578e7c +size 5026092 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..480109012a7eb959801540afed8925f9c9696316 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b61e7c60b6346ff01e129131d96410133ce4be161f7a22faba6e911b46039f5 +size 6106181 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..d7254fee130bfda5ce46fc65f6732fb0c8cee8e3 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f43682eed1e9e1cae7e69d9bc02d564ecee5e704dc9e815347a4321516e4222 +size 7172874 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0046215a5b68b55b56377d005cb8de3278627fe6 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dfbd39b7778bb424305d62fe68f62bd16436ffec41499f955f37852931e5d39 +size 8239035 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..ef26f4b80df83fe6b07c0751f73d6022e7ceebf0 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef9c1a810901ab1d53c234997b8ff1c1846878487677c62a1b77d3107229a77 +size 9311688 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_0.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..edb3534adb16542d1647edaf6e973ea4bfb40264 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd7dd961f6751b8f2ad77abd890c555965c079d01a1a1679763390d64d63ea5 +size 2837716 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_1.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..65a9902206ba2cf7a2bf13577fcb568eeecf80ad --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca090eed98f5b7d04478d7e43d3fdfd8b94486680efe1cb989201bd680849898 +size 5109396 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_2.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..02723ff5bdbe83ffc74f6c5520f3269cc0357045 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741a8167fe6713a26d8ca5dbea178b71b9dd14978d09ce75f6c1fa11f92b2496 +size 7383930 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_3.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..518fda4791e4358f592c592506fa6f12ec31d409 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16419420e3f077c7ba6b1730769584b6eaf6a6eea138fc89f4da6edb9d07175a +size 9653296 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_4.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dac11753c93bfbfc7ea72258096c225f48e42cc5 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1832a72c3d21a73af56d2782c8ba5d37bce1f52fb5228f1e968c02bc7989ab30 +size 11674458 diff --git a/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_5.jsonl b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7aa97fc0f868c9d7342e8696bcd6ccc42b3cbca0 --- /dev/null +++ b/421m3b91b5/evaluation/generation/examples.421m3b91b5_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400c134c8064b06882abe82e1481b2eb0191538b1134333cd7470b1adca75be4 +size 13897543 diff --git a/421m3b91b5/evaluation/generation/merged.csv b/421m3b91b5/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..a55dd7200de70966a5fbda81f33069c1e88eb1e5 --- /dev/null +++ b/421m3b91b5/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.03462813772314848 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.03462813772314848 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.07333475675979936 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.07333475675979936 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.0768604575160761 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.0768604575160761 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.08217996110518319 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.08217996110518319 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.0853395429614827 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.0853395429614827 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.08905416913450527 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.08905416913450527 +e2e_nlg_cleaned,5,average,multiple,0.07356617086669918 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.024173373329113998 +gem_xsum,0,median,rouge2_fmeasure,0.024173373329113998 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.015710142149030784 +gem_xsum,1,median,rouge2_fmeasure,0.015710142149030784 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.016339899446017426 +gem_xsum,2,median,rouge2_fmeasure,0.016339899446017426 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.017198804793613687 +gem_xsum,3,median,rouge2_fmeasure,0.017198804793613687 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.004942647365129906 +gem_xsum,4,median,rouge2_fmeasure,0.004942647365129906 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0004167222426927057 +gem_xsum,5,median,rouge2_fmeasure,0.0004167222426927057 +gem_xsum,5,average,multiple,0.013130264887599752 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.040231183755865985 +web_nlg_en,0,median,rouge2_fmeasure,0.040231183755865985 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03199910420801712 +web_nlg_en,1,median,rouge2_fmeasure,0.03199910420801712 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.02909294918208801 +web_nlg_en,2,median,rouge2_fmeasure,0.02909294918208801 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.03035641719434565 +web_nlg_en,3,median,rouge2_fmeasure,0.03035641719434565 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.029883470617163364 +web_nlg_en,4,median,rouge2_fmeasure,0.029883470617163364 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03125224633271726 +web_nlg_en,5,median,rouge2_fmeasure,0.03125224633271726 +web_nlg_en,5,average,multiple,0.0321358952150329 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.015181811389840852 +wiki_lingua_en,0,median,rouge2_fmeasure,0.015181811389840852 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.015034669686256276 +wiki_lingua_en,1,median,rouge2_fmeasure,0.015034669686256276 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.009894560043561524 +wiki_lingua_en,2,median,rouge2_fmeasure,0.009894560043561524 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.010182474205209647 +wiki_lingua_en,3,median,rouge2_fmeasure,0.010182474205209647 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.004010615388886775 +wiki_lingua_en,4,median,rouge2_fmeasure,0.004010615388886775 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0008789405383568104 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0008789405383568104 +wiki_lingua_en,5,average,multiple,0.009197178542018647 diff --git a/421m3b91b5/evaluation/generation/merged.json b/421m3b91b5/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..d3cf8b562d934f84b1d01072f18530eef2a07448 --- /dev/null +++ b/421m3b91b5/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.23177272289200984, "bleu_stderr": 0.02232644342648684, "rouge1_fmeasure": 0.08501748924707608, "rouge1_fmeasure_stderr": 0.0016897110414027562, "rouge1_precision": 0.05527594145082781, "rouge1_precision_stderr": 0.0012465231724252742, "rouge1_recall": 0.2418587672030188, "rouge1_recall_stderr": 0.004019327605144833, "rouge2_fmeasure": 0.040231183755865985, "rouge2_fmeasure_stderr": 0.0010882095141455182, "rouge2_precision": 0.025961495442502317, "rouge2_precision_stderr": 0.0007780782480280117, "rouge2_recall": 0.11843347133692429, "rouge2_recall_stderr": 0.0028540813294636257, "rougeL_fmeasure": 0.08314138498678003, "rougeL_fmeasure_stderr": 0.0016231446043623204, "rougeL_precision": 0.05395710508825529, "rougeL_precision_stderr": 0.0011929395949061676, "rougeL_recall": 0.2379697178202554, "rougeL_recall_stderr": 0.003961524066291127, "rougeLsum_fmeasure": 0.08213023336769926, "rougeLsum_fmeasure_stderr": 0.0015968743295787242, "rougeLsum_precision": 0.05337500845318596, "rougeLsum_precision_stderr": 0.0011839667538989299, "rougeLsum_recall": 0.23441022719094787, "rougeLsum_recall_stderr": 0.003815976303927829}}, "1": {"PALM_prompt": {"bleu": 0.15591930086532843, "bleu_stderr": 0.020043337299334937, "rouge1_fmeasure": 0.07451602193826748, "rouge1_fmeasure_stderr": 0.001531724832572773, "rouge1_precision": 0.04915696336088872, "rouge1_precision_stderr": 0.0013719245742553428, "rouge1_recall": 0.22817875412187882, "rouge1_recall_stderr": 0.0038498391014422984, "rouge2_fmeasure": 0.03199910420801712, "rouge2_fmeasure_stderr": 0.0009604719879955616, "rouge2_precision": 0.020531505897721725, "rouge2_precision_stderr": 0.0006614769676442327, "rouge2_recall": 0.09594413074539906, "rouge2_recall_stderr": 0.002613650288767959, "rougeL_fmeasure": 0.07254382626050829, "rougeL_fmeasure_stderr": 0.001482218914183275, "rougeL_precision": 0.047883171012056816, "rougeL_precision_stderr": 0.001343005391086916, "rougeL_recall": 0.22184044926729388, "rougeL_recall_stderr": 0.0037104935930818562, "rougeLsum_fmeasure": 0.07197825182983052, "rougeLsum_fmeasure_stderr": 0.0014742649681246808, "rougeLsum_precision": 0.047546859924243255, "rougeLsum_precision_stderr": 0.0013419792425392316, "rougeLsum_recall": 0.22014694911717558, "rougeLsum_recall_stderr": 0.0036698611197668388}}, "2": {"PALM_prompt": {"bleu": 0.13871707140912973, "bleu_stderr": 0.016788036903946976, "rouge1_fmeasure": 0.06987553531042545, "rouge1_fmeasure_stderr": 0.0014539433830700974, "rouge1_precision": 0.04461027579233427, "rouge1_precision_stderr": 0.0010252458063135287, "rouge1_recall": 0.21811803941951569, "rouge1_recall_stderr": 0.0037017929524820227, "rouge2_fmeasure": 0.02909294918208801, "rouge2_fmeasure_stderr": 0.0008964441540124039, "rouge2_precision": 0.018660136528242423, "rouge2_precision_stderr": 0.0006140753895605142, "rouge2_recall": 0.08755745192640091, "rouge2_recall_stderr": 0.002430794457259159, "rougeL_fmeasure": 0.06823871063465026, "rougeL_fmeasure_stderr": 0.0014261323213445975, "rougeL_precision": 0.04358571012467286, "rougeL_precision_stderr": 0.0010061709766373032, "rougeL_recall": 0.21211372669415113, "rougeL_recall_stderr": 0.0035758418823002756, "rougeLsum_fmeasure": 0.06756980792712407, "rougeLsum_fmeasure_stderr": 0.0014205362992606334, "rougeLsum_precision": 0.0431816158725768, "rougeLsum_precision_stderr": 0.0010025065034385686, "rougeLsum_recall": 0.20966310389431556, "rougeLsum_recall_stderr": 0.0035384273192345045}}, "3": {"PALM_prompt": {"bleu": 0.11690247533544619, "bleu_stderr": 0.009287305842204673, "rouge1_fmeasure": 0.07202771447835704, "rouge1_fmeasure_stderr": 0.001490885359262395, "rouge1_precision": 0.046245688129935014, "rouge1_precision_stderr": 0.0010696458495622631, "rouge1_recall": 0.22042220582796518, "rouge1_recall_stderr": 0.0037371854746322845, "rouge2_fmeasure": 0.03035641719434565, "rouge2_fmeasure_stderr": 0.0009321288238861425, "rouge2_precision": 0.019580176815454965, "rouge2_precision_stderr": 0.0006506841517107448, "rouge2_recall": 0.09054710218942696, "rouge2_recall_stderr": 0.0024953240626813225, "rougeL_fmeasure": 0.07027243705758501, "rougeL_fmeasure_stderr": 0.001452780815054275, "rougeL_precision": 0.04511844470650973, "rougeL_precision_stderr": 0.0010391655732875631, "rougeL_recall": 0.21441315183041518, "rougeL_recall_stderr": 0.0035987476288693045, "rougeLsum_fmeasure": 0.06958140828669782, "rougeLsum_fmeasure_stderr": 0.0014430649224632791, "rougeLsum_precision": 0.044690873379109534, "rougeLsum_precision_stderr": 0.0010335538927214356, "rougeLsum_recall": 0.21226321516952545, "rougeLsum_recall_stderr": 0.0035914422547391055}}, "4": {"PALM_prompt": {"bleu": 0.10677700841911389, "bleu_stderr": 0.010532753032062412, "rouge1_fmeasure": 0.07157002920717628, "rouge1_fmeasure_stderr": 0.0014616935888478546, "rouge1_precision": 0.04615281662149837, "rouge1_precision_stderr": 0.0010879113367318798, "rouge1_recall": 0.22148172798476662, "rouge1_recall_stderr": 0.003759468700074842, "rouge2_fmeasure": 0.029883470617163364, "rouge2_fmeasure_stderr": 0.0009026590175097606, "rouge2_precision": 0.019174418049251147, "rouge2_precision_stderr": 0.0006244871255048812, "rouge2_recall": 0.09103456901778384, "rouge2_recall_stderr": 0.0024825202804016214, "rougeL_fmeasure": 0.06993498443202883, "rougeL_fmeasure_stderr": 0.0014395169753379103, "rougeL_precision": 0.045119423861603725, "rougeL_precision_stderr": 0.001073039312603156, "rougeL_recall": 0.21556020554547964, "rougeL_recall_stderr": 0.003632709619034944, "rougeLsum_fmeasure": 0.06934638240081208, "rougeLsum_fmeasure_stderr": 0.0014247023478863169, "rougeLsum_precision": 0.04476216662558973, "rougeLsum_precision_stderr": 0.0010644849383889573, "rougeLsum_recall": 0.21355300560358506, "rougeLsum_recall_stderr": 0.0035901059049123925}}, "5": {"PALM_prompt": {"bleu": 0.15138575433897905, "bleu_stderr": 0.017900594645392207, "rouge1_fmeasure": 0.07429447162765516, "rouge1_fmeasure_stderr": 0.0015383260022193645, "rouge1_precision": 0.04823668690787371, "rouge1_precision_stderr": 0.0011345767818553378, "rouge1_recall": 0.22292464568286927, "rouge1_recall_stderr": 0.0038456993631155742, "rouge2_fmeasure": 0.03125224633271726, "rouge2_fmeasure_stderr": 0.0009564038941008088, "rouge2_precision": 0.020106627977607386, "rouge2_precision_stderr": 0.0006604378076018168, "rouge2_recall": 0.0930666896736683, "rouge2_recall_stderr": 0.0025812913573701997, "rougeL_fmeasure": 0.07230650733729013, "rougeL_fmeasure_stderr": 0.0014968722133129424, "rougeL_precision": 0.046926018454489425, "rougeL_precision_stderr": 0.0011014019730810139, "rougeL_recall": 0.21666597922428132, "rougeL_recall_stderr": 0.0037251822118275196, "rougeLsum_fmeasure": 0.07174191910647153, "rougeLsum_fmeasure_stderr": 0.001488758460774504, "rougeLsum_precision": 0.04661651332318032, "rougeLsum_precision_stderr": 0.0011024401738617675, "rougeLsum_recall": 0.21459919419928164, "rougeLsum_recall_stderr": 0.0036887425804951967}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.7157135005698502, "bleu_stderr": 0.04647014375603136, "rouge1_fmeasure": 0.10832056502796245, "rouge1_fmeasure_stderr": 0.0016461665492261007, "rouge1_precision": 0.09447866279876174, "rouge1_precision_stderr": 0.0016120241044285232, "rouge1_recall": 0.1540564186245438, "rouge1_recall_stderr": 0.0023725318234165483, "rouge2_fmeasure": 0.015181811389840852, "rouge2_fmeasure_stderr": 0.0005506979609486634, "rouge2_precision": 0.012978391395703715, "rouge2_precision_stderr": 0.00048074981881528943, "rouge2_recall": 0.02301810460593164, "rouge2_recall_stderr": 0.0009956565819111785, "rougeL_fmeasure": 0.09453921947632203, "rougeL_fmeasure_stderr": 0.0013355023556588757, "rougeL_precision": 0.08193505277480057, "rougeL_precision_stderr": 0.0013007255437820148, "rougeL_recall": 0.13628710672412833, "rougeL_recall_stderr": 0.0020475851989507207, "rougeLsum_fmeasure": 0.10089536393117866, "rougeLsum_fmeasure_stderr": 0.001511628829699065, "rougeLsum_precision": 0.08784439043968462, "rougeLsum_precision_stderr": 0.0014790920660564867, "rougeLsum_recall": 0.14407726529605594, "rougeLsum_recall_stderr": 0.0022187007547342565}}, "1": {"tldr_en": {"bleu": 0.7568216796206494, "bleu_stderr": 0.0365762458878659, "rouge1_fmeasure": 0.12776545479953536, "rouge1_fmeasure_stderr": 0.001583327649945923, "rouge1_precision": 0.1119730515754374, "rouge1_precision_stderr": 0.0015546284623857009, "rouge1_recall": 0.1809241968351015, "rouge1_recall_stderr": 0.0023564572667088978, "rouge2_fmeasure": 0.015034669686256276, "rouge2_fmeasure_stderr": 0.0005599076489425881, "rouge2_precision": 0.012687266208996356, "rouge2_precision_stderr": 0.00047683091570961515, "rouge2_recall": 0.02295689455252069, "rouge2_recall_stderr": 0.0009715194431889655, "rougeL_fmeasure": 0.09467857796328046, "rougeL_fmeasure_stderr": 0.0010413737833186599, "rougeL_precision": 0.08248229363295397, "rougeL_precision_stderr": 0.001025565708877038, "rougeL_recall": 0.13703985539484212, "rougeL_recall_stderr": 0.0017570256829565357, "rougeLsum_fmeasure": 0.12061788807935782, "rougeLsum_fmeasure_stderr": 0.0014744781895989557, "rougeLsum_precision": 0.10561979752507195, "rougeLsum_precision_stderr": 0.0014505402845061354, "rougeLsum_recall": 0.17142948921774814, "rougeLsum_recall_stderr": 0.0022322231249741783}}, "2": {"tldr_en": {"bleu": 0.5008074434105538, "bleu_stderr": 0.027420163187722234, "rouge1_fmeasure": 0.113136060241949, "rouge1_fmeasure_stderr": 0.0014392617841219371, "rouge1_precision": 0.10014548191146704, "rouge1_precision_stderr": 0.0014565924281840863, "rouge1_recall": 0.1575653498735645, "rouge1_recall_stderr": 0.002075553615634516, "rouge2_fmeasure": 0.009894560043561524, "rouge2_fmeasure_stderr": 0.00043909962900646094, "rouge2_precision": 0.008598460740984758, "rouge2_precision_stderr": 0.0003923280052930601, "rouge2_recall": 0.014622841033730277, "rouge2_recall_stderr": 0.0007520706503860998, "rougeL_fmeasure": 0.0885732558882455, "rougeL_fmeasure_stderr": 0.0010105020660668759, "rougeL_precision": 0.07777296552153214, "rougeL_precision_stderr": 0.0010189852904572438, "rougeL_recall": 0.12593569249466863, "rougeL_recall_stderr": 0.001616788777290793, "rougeLsum_fmeasure": 0.1070061210085247, "rougeLsum_fmeasure_stderr": 0.0013313074012279494, "rougeLsum_precision": 0.09456741339374493, "rougeLsum_precision_stderr": 0.0013485738233839097, "rougeLsum_recall": 0.14953299345084117, "rougeLsum_recall_stderr": 0.001945811896778034}}, "3": {"tldr_en": {"bleu": 0.6451490842782704, "bleu_stderr": 0.04949835258471997, "rouge1_fmeasure": 0.09530805481842063, "rouge1_fmeasure_stderr": 0.0015958221853691465, "rouge1_precision": 0.08833884249120096, "rouge1_precision_stderr": 0.0016673032171567173, "rouge1_recall": 0.1315357579913616, "rouge1_recall_stderr": 0.002286330317290917, "rouge2_fmeasure": 0.010182474205209647, "rouge2_fmeasure_stderr": 0.0004898627779511664, "rouge2_precision": 0.009063060451693672, "rouge2_precision_stderr": 0.00045344650796241257, "rouge2_recall": 0.015727013632766268, "rouge2_recall_stderr": 0.0009142365063516984, "rougeL_fmeasure": 0.07807114760808846, "rougeL_fmeasure_stderr": 0.0012320036077104104, "rougeL_precision": 0.0721907541408395, "rougeL_precision_stderr": 0.0013116484262805982, "rougeL_recall": 0.10968615915706327, "rougeL_recall_stderr": 0.0019015907007798752, "rougeLsum_fmeasure": 0.08904837251695515, "rougeLsum_fmeasure_stderr": 0.0014644806154194177, "rougeLsum_precision": 0.08248440495112994, "rougeLsum_precision_stderr": 0.00153250206702351, "rougeLsum_recall": 0.123214740487544, "rougeLsum_recall_stderr": 0.0021215127721801582}}, "4": {"tldr_en": {"bleu": 0.19863349159696345, "bleu_stderr": 0.020082456809440215, "rouge1_fmeasure": 0.032415438282286, "rouge1_fmeasure_stderr": 0.0012577519098217766, "rouge1_precision": 0.03202423756442962, "rouge1_precision_stderr": 0.0014355653077500936, "rouge1_recall": 0.04617716025641568, "rouge1_recall_stderr": 0.001862897722705631, "rouge2_fmeasure": 0.004010615388886775, "rouge2_fmeasure_stderr": 0.00032253874673817534, "rouge2_precision": 0.004132309151704993, "rouge2_precision_stderr": 0.0004806369034292581, "rouge2_recall": 0.006544256549277832, "rouge2_recall_stderr": 0.000674451226759297, "rougeL_fmeasure": 0.028103964819551033, "rougeL_fmeasure_stderr": 0.001065948161074736, "rougeL_precision": 0.027672839936926416, "rougeL_precision_stderr": 0.0012307971594838533, "rougeL_recall": 0.04061635263335589, "rougeL_recall_stderr": 0.0016394282301373483, "rougeLsum_fmeasure": 0.029753940030187404, "rougeLsum_fmeasure_stderr": 0.0011391554285021324, "rougeLsum_precision": 0.029479508560887577, "rougeLsum_precision_stderr": 0.0013229802106936625, "rougeLsum_recall": 0.04254618276718864, "rougeLsum_recall_stderr": 0.0017078183730069745}}, "5": {"tldr_en": {"bleu": 7.258601739337029e-07, "bleu_stderr": 1.0366520457308342e-06, "rouge1_fmeasure": 0.005982548603636205, "rouge1_fmeasure_stderr": 0.0006026463885272619, "rouge1_precision": 0.006559425974826442, "rouge1_precision_stderr": 0.0007772707987241637, "rouge1_recall": 0.008162664783148844, "rouge1_recall_stderr": 0.0008557272559576417, "rouge2_fmeasure": 0.0008789405383568104, "rouge2_fmeasure_stderr": 0.00014915160142531196, "rouge2_precision": 0.0010078505625016065, "rouge2_precision_stderr": 0.00020525860229414494, "rouge2_recall": 0.0014689900879043029, "rouge2_recall_stderr": 0.00034502461378481663, "rougeL_fmeasure": 0.005275238912702371, "rougeL_fmeasure_stderr": 0.0005250606523599148, "rougeL_precision": 0.005718323694944019, "rougeL_precision_stderr": 0.0006701572692651454, "rougeL_recall": 0.007339449594823124, "rougeL_recall_stderr": 0.000770907895860834, "rougeLsum_fmeasure": 0.005633569731566957, "rougeLsum_fmeasure_stderr": 0.0005703046910121007, "rougeLsum_precision": 0.006186482491790637, "rougeLsum_precision_stderr": 0.0007409724280389656, "rougeLsum_recall": 0.007662196121244796, "rougeLsum_recall_stderr": 0.0008003804501317619}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 2.3401189092178583, "bleu_stderr": 0.040276117196671284, "rouge1_fmeasure": 0.14110806522366368, "rouge1_fmeasure_stderr": 0.00163799349633168, "rouge1_precision": 0.1338482575359331, "rouge1_precision_stderr": 0.0024648821332688554, "rouge1_recall": 0.19751057677787456, "rouge1_recall_stderr": 0.0022572096596852336, "rouge2_fmeasure": 0.03462813772314848, "rouge2_fmeasure_stderr": 0.0007607431713226254, "rouge2_precision": 0.03247407533046338, "rouge2_precision_stderr": 0.0009841360244194405, "rouge2_recall": 0.04905120516211981, "rouge2_recall_stderr": 0.0010857431861598476, "rougeL_fmeasure": 0.13585027913820621, "rougeL_fmeasure_stderr": 0.0015569718092981529, "rougeL_precision": 0.1242011591418883, "rougeL_precision_stderr": 0.0020399638390954226, "rougeL_recall": 0.19323930452458596, "rougeL_recall_stderr": 0.0022556438726476693, "rougeLsum_fmeasure": 0.10688711791196877, "rougeLsum_fmeasure_stderr": 0.0013147429613898317, "rougeLsum_precision": 0.10227961147329598, "rougeLsum_precision_stderr": 0.002007607729564016, "rougeLsum_recall": 0.14913014161326776, "rougeLsum_recall_stderr": 0.0017880704560848586}}, "1": {"generate_text_restaurant": {"bleu": 3.5992517367535997, "bleu_stderr": 0.12500914847960642, "rouge1_fmeasure": 0.28101229235208536, "rouge1_fmeasure_stderr": 0.0020265969737045648, "rouge1_precision": 0.3614234345919415, "rouge1_precision_stderr": 0.0034603741768298732, "rouge1_recall": 0.2669063847937934, "rouge1_recall_stderr": 0.002337075988797385, "rouge2_fmeasure": 0.07333475675979936, "rouge2_fmeasure_stderr": 0.0013071931289660096, "rouge2_precision": 0.12030442505390465, "rouge2_precision_stderr": 0.003538445104828453, "rouge2_recall": 0.06897408855755997, "rouge2_recall_stderr": 0.0013139643398713688, "rougeL_fmeasure": 0.20524138627600222, "rougeL_fmeasure_stderr": 0.001545962308793527, "rougeL_precision": 0.27329668658465245, "rougeL_precision_stderr": 0.003315427145096111, "rougeL_recall": 0.19403178278886463, "rougeL_recall_stderr": 0.0017664752356725145, "rougeLsum_fmeasure": 0.2322460841505594, "rougeLsum_fmeasure_stderr": 0.0018029363292571669, "rougeLsum_precision": 0.30454450005440276, "rougeLsum_precision_stderr": 0.0034144218184777, "rougeLsum_recall": 0.21993558079129125, "rougeLsum_recall_stderr": 0.002032200498111229}}, "2": {"generate_text_restaurant": {"bleu": 3.5942635908606877, "bleu_stderr": 0.09147815934513456, "rouge1_fmeasure": 0.279394643327864, "rouge1_fmeasure_stderr": 0.0021951717114593317, "rouge1_precision": 0.3793434189953508, "rouge1_precision_stderr": 0.004069614391187794, "rouge1_recall": 0.26746437140757334, "rouge1_recall_stderr": 0.002526704673951825, "rouge2_fmeasure": 0.0768604575160761, "rouge2_fmeasure_stderr": 0.0013107245087658804, "rouge2_precision": 0.1515456989078487, "rouge2_precision_stderr": 0.004472870355521199, "rouge2_recall": 0.07217770344112004, "rouge2_recall_stderr": 0.0013474768357982336, "rougeL_fmeasure": 0.2058924032347511, "rougeL_fmeasure_stderr": 0.0016251665479717697, "rougeL_precision": 0.29555872383394716, "rougeL_precision_stderr": 0.004057889592592142, "rougeL_recall": 0.19576962242521914, "rougeL_recall_stderr": 0.00187325384455503, "rougeLsum_fmeasure": 0.23146409262761347, "rougeLsum_fmeasure_stderr": 0.0019069704764335866, "rougeLsum_precision": 0.3245908498877826, "rougeLsum_precision_stderr": 0.004095922097833713, "rougeLsum_recall": 0.22064922758817399, "rougeLsum_recall_stderr": 0.0021668246347809153}}, "3": {"generate_text_restaurant": {"bleu": 3.8277549889238736, "bleu_stderr": 0.11727775129407206, "rouge1_fmeasure": 0.2778774267600531, "rouge1_fmeasure_stderr": 0.002231862571930029, "rouge1_precision": 0.411306959214004, "rouge1_precision_stderr": 0.004596205232308239, "rouge1_recall": 0.26249329655423426, "rouge1_recall_stderr": 0.0025813575811259462, "rouge2_fmeasure": 0.08217996110518319, "rouge2_fmeasure_stderr": 0.0013750056974164472, "rouge2_precision": 0.19162481139662949, "rouge2_precision_stderr": 0.005328850643116758, "rouge2_recall": 0.07577164851702119, "rouge2_recall_stderr": 0.0014194426398597818, "rougeL_fmeasure": 0.20735400433932427, "rougeL_fmeasure_stderr": 0.0016689881709039034, "rougeL_precision": 0.33043415470701937, "rougeL_precision_stderr": 0.004742365592495772, "rougeL_recall": 0.1937730893993055, "rougeL_recall_stderr": 0.0019119918138972675, "rougeLsum_fmeasure": 0.23289659318871578, "rougeLsum_fmeasure_stderr": 0.0019422284749949631, "rougeLsum_precision": 0.35986441072271874, "rougeLsum_precision_stderr": 0.004720685076235124, "rougeLsum_recall": 0.21855414102493695, "rougeLsum_recall_stderr": 0.0022133351382317944}}, "4": {"generate_text_restaurant": {"bleu": 3.8730750001922387, "bleu_stderr": 0.08862530091808378, "rouge1_fmeasure": 0.27642834239969777, "rouge1_fmeasure_stderr": 0.002243190375697101, "rouge1_precision": 0.43734665127896905, "rouge1_precision_stderr": 0.004915431766267602, "rouge1_recall": 0.25825014881139846, "rouge1_recall_stderr": 0.0026138643131703014, "rouge2_fmeasure": 0.0853395429614827, "rouge2_fmeasure_stderr": 0.0013678253333793263, "rouge2_precision": 0.22342493014598566, "rouge2_precision_stderr": 0.005855663857563032, "rouge2_recall": 0.0774777502850809, "rouge2_recall_stderr": 0.0014095351417513376, "rougeL_fmeasure": 0.20776551068527133, "rougeL_fmeasure_stderr": 0.0016556654268637695, "rougeL_precision": 0.35796641748717295, "rougeL_precision_stderr": 0.0051466390298742296, "rougeL_recall": 0.19165634222104913, "rougeL_recall_stderr": 0.0019262622276203, "rougeLsum_fmeasure": 0.23107954066521458, "rougeLsum_fmeasure_stderr": 0.001935297756579334, "rougeLsum_precision": 0.38458437667991324, "rougeLsum_precision_stderr": 0.005085579108635989, "rougeLsum_recall": 0.2141255477692916, "rougeLsum_recall_stderr": 0.0022188315932043135}}, "5": {"generate_text_restaurant": {"bleu": 3.871663587272024, "bleu_stderr": 0.08213288901946655, "rouge1_fmeasure": 0.2752761295397579, "rouge1_fmeasure_stderr": 0.0022444379807100992, "rouge1_precision": 0.463975157198053, "rouge1_precision_stderr": 0.005172688538225158, "rouge1_recall": 0.25222528921228415, "rouge1_recall_stderr": 0.0025995788226328095, "rouge2_fmeasure": 0.08905416913450527, "rouge2_fmeasure_stderr": 0.0013768518037889515, "rouge2_precision": 0.25251781213190033, "rouge2_precision_stderr": 0.006227887703719169, "rouge2_recall": 0.07903749837559827, "rouge2_recall_stderr": 0.001389474253801609, "rougeL_fmeasure": 0.2100839933880261, "rougeL_fmeasure_stderr": 0.0016694463788081287, "rougeL_precision": 0.38595874277972353, "rougeL_precision_stderr": 0.005425372277596022, "rougeL_recall": 0.19012858098709634, "rougeL_recall_stderr": 0.0019398139592262148, "rougeLsum_fmeasure": 0.23236587517475857, "rougeLsum_fmeasure_stderr": 0.0019368568086863874, "rougeLsum_precision": 0.4122015013890839, "rougeLsum_precision_stderr": 0.005345816177605482, "rougeLsum_recall": 0.21127057478563274, "rougeLsum_recall_stderr": 0.0022217687345829993}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.8723920335368474, "bleu_stderr": 0.07351478331186045, "rouge1_fmeasure": 0.14603634865959045, "rouge1_fmeasure_stderr": 0.0020841313366621607, "rouge1_precision": 0.10435313975011247, "rouge1_precision_stderr": 0.0015651354656744249, "rouge1_recall": 0.2555439781765851, "rouge1_recall_stderr": 0.003558285673738991, "rouge2_fmeasure": 0.024173373329113998, "rouge2_fmeasure_stderr": 0.0009688116374499329, "rouge2_precision": 0.017103812367996997, "rouge2_precision_stderr": 0.0006930620273294279, "rouge2_recall": 0.043341553909434255, "rouge2_recall_stderr": 0.00176569772400685, "rougeL_fmeasure": 0.12308403161540907, "rougeL_fmeasure_stderr": 0.001625110608788102, "rougeL_precision": 0.08772513822430761, "rougeL_precision_stderr": 0.0012080239215113645, "rougeL_recall": 0.21657452718044604, "rougeL_recall_stderr": 0.002873568760674157, "rougeLsum_fmeasure": 0.11295811701049689, "rougeLsum_fmeasure_stderr": 0.0016569363950514997, "rougeLsum_precision": 0.08042448723073105, "rougeLsum_precision_stderr": 0.0012210878969545749, "rougeLsum_recall": 0.19946512972695213, "rougeLsum_recall_stderr": 0.0029584275829818217}}, "1": {"article_DOC_summary": {"bleu": 0.5964684071601108, "bleu_stderr": 0.06838453086802421, "rouge1_fmeasure": 0.12299227773307847, "rouge1_fmeasure_stderr": 0.001958112277088821, "rouge1_precision": 0.08747592902444107, "rouge1_precision_stderr": 0.0014531287916684458, "rouge1_recall": 0.21644112261135764, "rouge1_recall_stderr": 0.0033107140270396554, "rouge2_fmeasure": 0.015710142149030784, "rouge2_fmeasure_stderr": 0.0008583704462574906, "rouge2_precision": 0.01106556247219742, "rouge2_precision_stderr": 0.0006097089149891652, "rouge2_recall": 0.028166209498187943, "rouge2_recall_stderr": 0.001533311064611577, "rougeL_fmeasure": 0.1051867608217552, "rougeL_fmeasure_stderr": 0.0015744992567553885, "rougeL_precision": 0.07465145377891358, "rougeL_precision_stderr": 0.0011626174667832513, "rougeL_recall": 0.18628849900104955, "rougeL_recall_stderr": 0.0027360293889638074, "rougeLsum_fmeasure": 0.09628432822719876, "rougeLsum_fmeasure_stderr": 0.0015376419286776773, "rougeLsum_precision": 0.06827654871246044, "rougeLsum_precision_stderr": 0.001131987134152434, "rougeLsum_recall": 0.17105163608110932, "rougeLsum_recall_stderr": 0.0026933609788779758}}, "2": {"article_DOC_summary": {"bleu": 0.6089988048165847, "bleu_stderr": 0.0603775435206676, "rouge1_fmeasure": 0.12092380897447323, "rouge1_fmeasure_stderr": 0.0018851371517061449, "rouge1_precision": 0.08572791024630257, "rouge1_precision_stderr": 0.001384127706576123, "rouge1_recall": 0.21467210269736098, "rouge1_recall_stderr": 0.003317038170555432, "rouge2_fmeasure": 0.016339899446017426, "rouge2_fmeasure_stderr": 0.0008589336952150353, "rouge2_precision": 0.011439172714049112, "rouge2_precision_stderr": 0.0006012082075144695, "rouge2_recall": 0.02989642087800327, "rouge2_recall_stderr": 0.0016215511857418903, "rougeL_fmeasure": 0.106349864837288, "rougeL_fmeasure_stderr": 0.0015846051612748348, "rougeL_precision": 0.0752829050790085, "rougeL_precision_stderr": 0.0011596162782922079, "rougeL_recall": 0.18955113381130423, "rougeL_recall_stderr": 0.002835961354831114, "rougeLsum_fmeasure": 0.09420537882087585, "rougeLsum_fmeasure_stderr": 0.0014658748413253703, "rougeLsum_precision": 0.06663478234734735, "rougeLsum_precision_stderr": 0.001066868518646431, "rougeLsum_recall": 0.16847089979874588, "rougeLsum_recall_stderr": 0.0026626534377023315}}, "3": {"article_DOC_summary": {"bleu": 0.6352481576908428, "bleu_stderr": 0.07228550314335824, "rouge1_fmeasure": 0.11927879260429021, "rouge1_fmeasure_stderr": 0.0020977305010526533, "rouge1_precision": 0.08725627518416645, "rouge1_precision_stderr": 0.0017540235333849527, "rouge1_recall": 0.2065502217091037, "rouge1_recall_stderr": 0.0035051865393515855, "rouge2_fmeasure": 0.017198804793613687, "rouge2_fmeasure_stderr": 0.0008946401389735868, "rouge2_precision": 0.012719951156236566, "rouge2_precision_stderr": 0.0007406965606955484, "rouge2_recall": 0.029787145304670665, "rouge2_recall_stderr": 0.0015368778585668954, "rougeL_fmeasure": 0.10577166580317919, "rougeL_fmeasure_stderr": 0.0017689287079966291, "rougeL_precision": 0.07721191609461389, "rougeL_precision_stderr": 0.0014853368263540478, "rougeL_recall": 0.18418802513615165, "rougeL_recall_stderr": 0.0030256632043210233, "rougeLsum_fmeasure": 0.09377983788010003, "rougeLsum_fmeasure_stderr": 0.0016661080858413875, "rougeLsum_precision": 0.0686057369834113, "rougeLsum_precision_stderr": 0.001422941372147247, "rougeLsum_recall": 0.16359614543666398, "rougeLsum_recall_stderr": 0.002849671331740175}}, "4": {"article_DOC_summary": {"bleu": 0.300928021815898, "bleu_stderr": 0.06925636358571334, "rouge1_fmeasure": 0.034344690937185784, "rouge1_fmeasure_stderr": 0.001989633638126816, "rouge1_precision": 0.03048686866903372, "rouge1_precision_stderr": 0.002057687267513022, "rouge1_recall": 0.05350300840117225, "rouge1_recall_stderr": 0.003130762403545177, "rouge2_fmeasure": 0.004942647365129906, "rouge2_fmeasure_stderr": 0.0005595762399260895, "rouge2_precision": 0.003995899001837998, "rouge2_precision_stderr": 0.00050499691399483, "rouge2_recall": 0.008145814980496137, "rouge2_recall_stderr": 0.0009371172275023301, "rougeL_fmeasure": 0.02958035424455412, "rougeL_fmeasure_stderr": 0.0016803867000653534, "rougeL_precision": 0.025984491055271917, "rougeL_precision_stderr": 0.0016847394634243319, "rougeL_recall": 0.04659937629170288, "rougeL_recall_stderr": 0.002710020078389942, "rougeLsum_fmeasure": 0.027240573379215583, "rougeLsum_fmeasure_stderr": 0.0015817488059235681, "rougeLsum_precision": 0.024293545373462375, "rougeLsum_precision_stderr": 0.0016361709915266752, "rougeLsum_recall": 0.04256754491806482, "rougeLsum_recall_stderr": 0.0025166035120746206}}, "5": {"article_DOC_summary": {"bleu": 2.8887196381450717e-38, "bleu_stderr": 8.880640660739498e-33, "rouge1_fmeasure": 0.002540260392770392, "rouge1_fmeasure_stderr": 0.0006784884870699844, "rouge1_precision": 0.002805834500150757, "rouge1_precision_stderr": 0.0007612204769420191, "rouge1_recall": 0.0024013325042936823, "rouge1_recall_stderr": 0.0006357123052113102, "rouge2_fmeasure": 0.0004167222426927057, "rouge2_fmeasure_stderr": 0.0001512636536345213, "rouge2_precision": 0.0004745709032612473, "rouge2_precision_stderr": 0.00017749034421704978, "rouge2_recall": 0.00037964313856597286, "rouge2_recall_stderr": 0.0001367309522495532, "rougeL_fmeasure": 0.0018478732641861921, "rougeL_fmeasure_stderr": 0.0004841531159026887, "rougeL_precision": 0.002003747091807142, "rougeL_precision_stderr": 0.0005275557743060757, "rougeL_recall": 0.0017802029206495263, "rougeL_recall_stderr": 0.00046661766903965664, "rougeLsum_fmeasure": 0.002045339368848435, "rougeLsum_fmeasure_stderr": 0.0005490084314042811, "rougeLsum_precision": 0.002248874633359708, "rougeLsum_precision_stderr": 0.0006139815040464879, "rougeLsum_recall": 0.0019486166880306997, "rougeLsum_recall_stderr": 0.0005194547723758253}}}} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..7f67b66d59ba9366420a4f19b29bcce023d9ace9 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.23177272289200984, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.02232644342648684 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05527594145082781, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0012465231724252742 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2418587672030188, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004019327605144833 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08501748924707608, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016897110414027562 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.025961495442502317, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007780782480280117 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.11843347133692429, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0028540813294636257 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.040231183755865985, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010882095141455182 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.05395710508825529, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011929395949061676 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2379697178202554, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003961524066291127 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08314138498678003, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0016231446043623204 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.05337500845318596, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011839667538989299 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.23441022719094787, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003815976303927829 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.08213023336769926, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015968743295787242 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..276cfcc3ca77ee08f5ac66d48ae1fbddfd050edf --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.15591930086532843, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.020043337299334937 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04915696336088872, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013719245742553428 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22817875412187882, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038498391014422984 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07451602193826748, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001531724832572773 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.020531505897721725, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006614769676442327 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09594413074539906, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002613650288767959 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03199910420801712, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009604719879955616 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.047883171012056816, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001343005391086916 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.22184044926729388, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0037104935930818562 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07254382626050829, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001482218914183275 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.047546859924243255, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013419792425392316 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22014694911717558, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036698611197668388 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07197825182983052, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014742649681246808 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4435a18187ba633b87ca6d8122215fb946fc1fee --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.13871707140912973, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.016788036903946976 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04461027579233427, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010252458063135287 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21811803941951569, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0037017929524820227 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.06987553531042545, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014539433830700974 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.018660136528242423, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006140753895605142 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08755745192640091, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002430794457259159 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.02909294918208801, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0008964441540124039 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04358571012467286, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010061709766373032 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21211372669415113, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0035758418823002756 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.06823871063465026, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014261323213445975 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.0431816158725768, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010025065034385686 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.20966310389431556, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035384273192345045 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.06756980792712407, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014205362992606334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..552398a478b84f7924d460e3af2bd2e5b5379c3b --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.11690247533544619, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.009287305842204673 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.046245688129935014, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010696458495622631 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22042220582796518, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0037371854746322845 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07202771447835704, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001490885359262395 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.019580176815454965, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006506841517107448 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09054710218942696, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024953240626813225 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03035641719434565, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009321288238861425 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04511844470650973, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010391655732875631 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21441315183041518, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0035987476288693045 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07027243705758501, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001452780815054275 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.044690873379109534, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010335538927214356 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21226321516952545, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035914422547391055 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.06958140828669782, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014430649224632791 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..cb9a8a6fd52fcce41d91919df6fc2803ddcdf8c0 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.10677700841911389, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.010532753032062412 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04615281662149837, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0010879113367318798 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22148172798476662, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003759468700074842 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07157002920717628, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014616935888478546 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.019174418049251147, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006244871255048812 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09103456901778384, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024825202804016214 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.029883470617163364, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009026590175097606 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.045119423861603725, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001073039312603156 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21556020554547964, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003632709619034944 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.06993498443202883, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014395169753379103 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04476216662558973, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010644849383889573 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21355300560358506, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035901059049123925 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.06934638240081208, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014247023478863169 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..26d098a80f3ab36a174dc30ce4ea6bf81e5b294e --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.15138575433897905, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.017900594645392207 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04823668690787371, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011345767818553378 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22292464568286927, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038456993631155742 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07429447162765516, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015383260022193645 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.020106627977607386, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006604378076018168 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.0930666896736683, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025812913573701997 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03125224633271726, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009564038941008088 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.046926018454489425, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011014019730810139 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21666597922428132, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0037251822118275196 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07230650733729013, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014968722133129424 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04661651332318032, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011024401738617675 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21459919419928164, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036887425804951967 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07174191910647153, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001488758460774504 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..110e1cdbf0ddbc2591379d0320fd98c158a2ab7a --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.09447866279876174, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016120241044285232 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1540564186245438, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023725318234165483 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.10832056502796245, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016461665492261007 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012978391395703715, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00048074981881528943 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.02301810460593164, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009956565819111785 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.015181811389840852, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005506979609486634 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08193505277480057, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013007255437820148 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.13628710672412833, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0020475851989507207 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09453921947632203, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013355023556588757 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08784439043968462, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014790920660564867 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14407726529605594, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0022187007547342565 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10089536393117866, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001511628829699065 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7157135005698502, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04647014375603136 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c792cb8e33944e1636def5a3b82f172f833bec --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.1119730515754374, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015546284623857009 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1809241968351015, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0023564572667088978 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.12776545479953536, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001583327649945923 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012687266208996356, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00047683091570961515 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.02295689455252069, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009715194431889655 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.015034669686256276, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005599076489425881 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08248229363295397, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001025565708877038 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.13703985539484212, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0017570256829565357 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.09467857796328046, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010413737833186599 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.10561979752507195, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014505402845061354 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.17142948921774814, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0022322231249741783 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.12061788807935782, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014744781895989557 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7568216796206494, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0365762458878659 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..a971bc5bc41c54870c6a6e7550e8cd0e77a31863 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10014548191146704, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014565924281840863 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1575653498735645, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002075553615634516 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.113136060241949, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014392617841219371 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.008598460740984758, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0003923280052930601 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.014622841033730277, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0007520706503860998 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.009894560043561524, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00043909962900646094 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.07777296552153214, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010189852904572438 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12593569249466863, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.001616788777290793 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.0885732558882455, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010105020660668759 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09456741339374493, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013485738233839097 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.14953299345084117, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001945811896778034 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.1070061210085247, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013313074012279494 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5008074434105538, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.027420163187722234 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..bde27a176b80c28c7ceaa22a9751843b13152b95 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08833884249120096, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0016673032171567173 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1315357579913616, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.002286330317290917 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09530805481842063, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015958221853691465 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.009063060451693672, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00045344650796241257 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.015727013632766268, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009142365063516984 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.010182474205209647, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004898627779511664 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0721907541408395, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013116484262805982 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.10968615915706327, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019015907007798752 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.07807114760808846, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0012320036077104104 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.08248440495112994, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00153250206702351 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.123214740487544, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021215127721801582 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.08904837251695515, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014644806154194177 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.6451490842782704, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.04949835258471997 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..cb1b94b5a3fc904c6698243530f40ef20e65c899 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.03202423756442962, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0014355653077500936 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.04617716025641568, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.001862897722705631 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.032415438282286, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012577519098217766 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.004132309151704993, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004806369034292581 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.006544256549277832, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000674451226759297 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.004010615388886775, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00032253874673817534 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.027672839936926416, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012307971594838533 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.04061635263335589, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016394282301373483 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.028103964819551033, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001065948161074736 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.029479508560887577, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0013229802106936625 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.04254618276718864, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0017078183730069745 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.029753940030187404, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011391554285021324 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.19863349159696345, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.020082456809440215 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..3d029a241d4ec4e6a54df7bac90288c6a7cebf23 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.006559425974826442, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0007772707987241637 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.008162664783148844, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0008557272559576417 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.005982548603636205, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006026463885272619 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0010078505625016065, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00020525860229414494 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0014689900879043029, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00034502461378481663 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0008789405383568104, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00014915160142531196 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.005718323694944019, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0006701572692651454 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.007339449594823124, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.000770907895860834 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.005275238912702371, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0005250606523599148 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.006186482491790637, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0007409724280389656 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.007662196121244796, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0008003804501317619 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.005633569731566957, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005703046910121007 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 7.258601739337029e-07, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 1.0366520457308342e-06 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..27a60e08d1d5819937fbf999cbe525e4535343f4 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 2.3401189092178583, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.040276117196671284 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.1338482575359331, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0024648821332688554 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.19751057677787456, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0022572096596852336 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.14110806522366368, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.00163799349633168 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.03247407533046338, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0009841360244194405 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.04905120516211981, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0010857431861598476 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.03462813772314848, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0007607431713226254 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.1242011591418883, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0020399638390954226 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19323930452458596, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0022556438726476693 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.13585027913820621, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0015569718092981529 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.10227961147329598, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.002007607729564016 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.14913014161326776, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0017880704560848586 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.10688711791196877, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013147429613898317 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..ba6db53166cb7e5309d31df1de6ac30d52d62d18 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.5992517367535997, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.12500914847960642 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3614234345919415, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0034603741768298732 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2669063847937934, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002337075988797385 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.28101229235208536, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0020265969737045648 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.12030442505390465, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.003538445104828453 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06897408855755997, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0013139643398713688 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07333475675979936, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013071931289660096 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.27329668658465245, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.003315427145096111 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19403178278886463, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0017664752356725145 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20524138627600222, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.001545962308793527 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.30454450005440276, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0034144218184777 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21993558079129125, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002032200498111229 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2322460841505594, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018029363292571669 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8777b6e3fbbedd3858cd37a4e2b23a568c8de6e4 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.5942635908606877, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09147815934513456 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3793434189953508, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.004069614391187794 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.26746437140757334, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002526704673951825 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.279394643327864, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021951717114593317 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.1515456989078487, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.004472870355521199 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07217770344112004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0013474768357982336 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0768604575160761, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013107245087658804 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.29555872383394716, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.004057889592592142 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19576962242521914, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.00187325384455503 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2058924032347511, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016251665479717697 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3245908498877826, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.004095922097833713 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.22064922758817399, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0021668246347809153 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23146409262761347, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019069704764335866 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..b157b05cb01dccf2c8fc0fd81f0e1519c3320126 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.8277549889238736, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11727775129407206 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.411306959214004, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.004596205232308239 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.26249329655423426, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025813575811259462 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2778774267600531, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002231862571930029 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.19162481139662949, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005328850643116758 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07577164851702119, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0014194426398597818 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.08217996110518319, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013750056974164472 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.33043415470701937, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.004742365592495772 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1937730893993055, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019119918138972675 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20735400433932427, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016689881709039034 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.35986441072271874, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.004720685076235124 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21855414102493695, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022133351382317944 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23289659318871578, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019422284749949631 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ffe72a229af9bbd8eae4395c35bcc32f3ab834f9 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.8730750001922387, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08862530091808378 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.43734665127896905, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.004915431766267602 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.25825014881139846, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026138643131703014 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.27642834239969777, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002243190375697101 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.22342493014598566, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.005855663857563032 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.0774777502850809, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0014095351417513376 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.0853395429614827, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013678253333793263 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.35796641748717295, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0051466390298742296 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19165634222104913, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019262622276203 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20776551068527133, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016556654268637695 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.38458437667991324, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005085579108635989 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2141255477692916, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022188315932043135 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23107954066521458, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.001935297756579334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..cfe13f4229116015f7729b0e37e94b6abc4605df --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.871663587272024, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.08213288901946655 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.463975157198053, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.005172688538225158 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.25222528921228415, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025995788226328095 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2752761295397579, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022444379807100992 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.25251781213190033, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.006227887703719169 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07903749837559827, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001389474253801609 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.08905416913450527, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013768518037889515 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.38595874277972353, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.005425372277596022 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19012858098709634, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019398139592262148 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.2100839933880261, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016694463788081287 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.4122015013890839, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.005345816177605482 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.21127057478563274, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022217687345829993 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23236587517475857, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019368568086863874 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_0.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..5cbee174ea5d35f44b3c955737c26ce4cdab7a1e --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.10435313975011247, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0015651354656744249 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2555439781765851, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003558285673738991 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.14603634865959045, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020841313366621607 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.017103812367996997, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006930620273294279 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.043341553909434255, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.00176569772400685 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.024173373329113998, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0009688116374499329 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08772513822430761, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012080239215113645 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.21657452718044604, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002873568760674157 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.12308403161540907, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.001625110608788102 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.08042448723073105, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012210878969545749 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.19946512972695213, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0029584275829818217 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.11295811701049689, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016569363950514997 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.8723920335368474, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07351478331186045 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_1.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..72513c0a0cbe33d7827228138849327d71a1e77a --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08747592902444107, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014531287916684458 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21644112261135764, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0033107140270396554 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12299227773307847, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001958112277088821 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.01106556247219742, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006097089149891652 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.028166209498187943, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001533311064611577 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.015710142149030784, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008583704462574906 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07465145377891358, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0011626174667832513 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18628849900104955, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0027360293889638074 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.1051867608217552, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0015744992567553885 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06827654871246044, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001131987134152434 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17105163608110932, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0026933609788779758 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09628432822719876, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015376419286776773 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.5964684071601108, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06838453086802421 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_2.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..116f79fb4d50ff3b535623540a618001e66d004e --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08572791024630257, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.001384127706576123 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21467210269736098, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003317038170555432 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12092380897447323, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0018851371517061449 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.011439172714049112, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006012082075144695 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.02989642087800327, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0016215511857418903 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.016339899446017426, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008589336952150353 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.0752829050790085, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0011596162782922079 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18955113381130423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002835961354831114 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.106349864837288, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0015846051612748348 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06663478234734735, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001066868518646431 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16847089979874588, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0026626534377023315 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09420537882087585, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0014658748413253703 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6089988048165847, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.0603775435206676 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_3.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1d029bd45cc7ca42130661bb48f6405d6057bf49 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08725627518416645, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0017540235333849527 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.2065502217091037, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0035051865393515855 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11927879260429021, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0020977305010526533 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.012719951156236566, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007406965606955484 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.029787145304670665, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0015368778585668954 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.017198804793613687, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008946401389735868 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07721191609461389, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014853368263540478 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18418802513615165, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0030256632043210233 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10577166580317919, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017689287079966291 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0686057369834113, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001422941372147247 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16359614543666398, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002849671331740175 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09377983788010003, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016661080858413875 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.6352481576908428, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07228550314335824 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_4.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..d3f2d008852a52c7b6d17e69cc5e874588982d69 --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.03048686866903372, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.002057687267513022 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.05350300840117225, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003130762403545177 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.034344690937185784, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001989633638126816 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.003995899001837998, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00050499691399483 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.008145814980496137, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0009371172275023301 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.004942647365129906, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0005595762399260895 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.025984491055271917, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0016847394634243319 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.04659937629170288, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002710020078389942 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.02958035424455412, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016803867000653534 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024293545373462375, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0016361709915266752 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.04256754491806482, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0025166035120746206 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.027240573379215583, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015817488059235681 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.300928021815898, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.06925636358571334 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_5.json b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..b726bdaa2e1f564b021b2e9e3b9f6a3c03c1890d --- /dev/null +++ b/421m3b91b5/evaluation/generation/slim.421m3b91b5_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.002805834500150757, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0007612204769420191 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.0024013325042936823, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0006357123052113102 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.002540260392770392, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0006784884870699844 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0004745709032612473, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00017749034421704978 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.00037964313856597286, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0001367309522495532 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0004167222426927057, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0001512636536345213 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.002003747091807142, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0005275557743060757 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0017802029206495263, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.00046661766903965664 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0018478732641861921, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0004841531159026887 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.002248874633359708, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0006139815040464879 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0019486166880306997, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005194547723758253 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.002045339368848435, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0005490084314042811 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 2.8887196381450717e-38, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 8.880640660739498e-33 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b91b5/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_0.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..85987182a3acdba70b93b9f0d004f233ee94ed67 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.329,0.01486539538592837,0 +anli_r2,acc,0.327,0.014842213153411245,0 +anli_r3,acc,0.3416666666666667,0.013696658778002514,0 +arc_challenge,acc,0.1825938566552901,0.011289730684565,0 +arc_challenge,acc_norm,0.23122866894197952,0.012320858834772271,0 +arc_easy,acc,0.3939393939393939,0.010026305355981823,0 +arc_easy,acc_norm,0.3640572390572391,0.009873293392779117,0 +boolq,acc,0.5571865443425077,0.008687668766930827,1 +cb,acc,0.3392857142857143,0.06384226561930825,1 +cb,f1,0.23669467787114848,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.2815176259709221,0.004488201756642577,0 +hellaswag,acc_norm,0.2954590718980283,0.0045531640133795554,0 +piqa,acc,0.6229597388465724,0.011307569752543904,0 +piqa,acc_norm,0.6278563656147987,0.011277968313592745,0 +rte,acc,0.5270758122743683,0.030052303463143706,0 +sciq,acc,0.66,0.014987482264363937,0 +sciq,acc_norm,0.57,0.015663503610155276,0 +storycloze_2016,acc,0.5681453768038482,0.01145454181271244,0 +winogrande,acc,0.5067087608524072,0.014051220692330349,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_0.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_0.json new file mode 100644 index 0000000000000000000000000000000000000000..ed544a4ebeb4c9fc6144d36c0af7d5d469ffed42 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.329, + "acc_stderr": 0.01486539538592837 + }, + "anli_r2": { + "acc": 0.327, + "acc_stderr": 0.014842213153411245 + }, + "anli_r3": { + "acc": 0.3416666666666667, + "acc_stderr": 0.013696658778002514 + }, + "cb": { + "acc": 0.3392857142857143, + "acc_stderr": 0.06384226561930825, + "f1": 0.23669467787114848 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.2815176259709221, + "acc_stderr": 0.004488201756642577, + "acc_norm": 0.2954590718980283, + "acc_norm_stderr": 0.0045531640133795554 + }, + "rte": { + "acc": 0.5270758122743683, + "acc_stderr": 0.030052303463143706 + }, + "winogrande": { + "acc": 0.5067087608524072, + "acc_stderr": 0.014051220692330349 + }, + "storycloze_2016": { + "acc": 0.5681453768038482, + "acc_stderr": 0.01145454181271244 + }, + "boolq": { + "acc": 0.5571865443425077, + "acc_stderr": 0.008687668766930827 + }, + "arc_easy": { + "acc": 0.3939393939393939, + "acc_stderr": 0.010026305355981823, + "acc_norm": 0.3640572390572391, + "acc_norm_stderr": 0.009873293392779117 + }, + "arc_challenge": { + "acc": 0.1825938566552901, + "acc_stderr": 0.011289730684565, + "acc_norm": 0.23122866894197952, + "acc_norm_stderr": 0.012320858834772271 + }, + "sciq": { + "acc": 0.66, + "acc_stderr": 0.014987482264363937, + "acc_norm": 0.57, + "acc_norm_stderr": 0.015663503610155276 + }, + "piqa": { + "acc": 0.6229597388465724, + "acc_stderr": 0.011307569752543904, + "acc_norm": 0.6278563656147987, + "acc_norm_stderr": 0.011277968313592745 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_1.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..e35d2e07c66f1e5878205dfb63bc08e4e42fb7ea --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.319,0.014746404865473486,0 +anli_r2,acc,0.323,0.014794927843348642,0 +anli_r3,acc,0.33916666666666667,0.013672343491681829,0 +arc_challenge,acc,0.17491467576791808,0.011101562501828229,0 +arc_challenge,acc_norm,0.2226962457337884,0.01215831477482994,0 +arc_easy,acc,0.38636363636363635,0.009991296778159624,0 +arc_easy,acc_norm,0.3611111111111111,0.009856013425811244,0 +boolq,acc,0.5168195718654435,0.008740105658763948,1 +cb,acc,0.5,0.06741998624632421,1 +cb,f1,0.34491725768321513,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.2818163712407887,0.004489648865080887,0 +hellaswag,acc_norm,0.2925712009559849,0.004540134005060328,0 +piqa,acc,0.6262241566920566,0.01128797256320102,0 +piqa,acc_norm,0.6224156692056583,0.011310782787145776,0 +rte,acc,0.5342960288808665,0.030025579819366422,0 +sciq,acc,0.665,0.014933117490932572,0 +sciq,acc_norm,0.604,0.015473313265859405,0 +storycloze_2016,acc,0.5633351149118119,0.01146929551844221,0 +winogrande,acc,0.5185477505919495,0.014042813708888378,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_1.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0e9b90cc78a3d0848b94a774688042e903030cce --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.319, + "acc_stderr": 0.014746404865473486 + }, + "anli_r2": { + "acc": 0.323, + "acc_stderr": 0.014794927843348642 + }, + "anli_r3": { + "acc": 0.33916666666666667, + "acc_stderr": 0.013672343491681829 + }, + "cb": { + "acc": 0.5, + "acc_stderr": 0.06741998624632421, + "f1": 0.34491725768321513 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.2818163712407887, + "acc_stderr": 0.004489648865080887, + "acc_norm": 0.2925712009559849, + "acc_norm_stderr": 0.004540134005060328 + }, + "rte": { + "acc": 0.5342960288808665, + "acc_stderr": 0.030025579819366422 + }, + "winogrande": { + "acc": 0.5185477505919495, + "acc_stderr": 0.014042813708888378 + }, + "storycloze_2016": { + "acc": 0.5633351149118119, + "acc_stderr": 0.01146929551844221 + }, + "boolq": { + "acc": 0.5168195718654435, + "acc_stderr": 0.008740105658763948 + }, + "arc_easy": { + "acc": 0.38636363636363635, + "acc_stderr": 0.009991296778159624, + "acc_norm": 0.3611111111111111, + "acc_norm_stderr": 0.009856013425811244 + }, + "arc_challenge": { + "acc": 0.17491467576791808, + "acc_stderr": 0.011101562501828229, + "acc_norm": 0.2226962457337884, + "acc_norm_stderr": 0.01215831477482994 + }, + "sciq": { + "acc": 0.665, + "acc_stderr": 0.014933117490932572, + "acc_norm": 0.604, + "acc_norm_stderr": 0.015473313265859405 + }, + "piqa": { + "acc": 0.6262241566920566, + "acc_stderr": 0.01128797256320102, + "acc_norm": 0.6224156692056583, + "acc_norm_stderr": 0.011310782787145776 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_2.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..be87093b158fe42327428dd8ce1663cd1aec3aef --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.326,0.01483050720454104,0 +anli_r2,acc,0.346,0.015050266127564431,0 +anli_r3,acc,0.3425,0.013704669762934727,0 +arc_challenge,acc,0.17918088737201365,0.011207045216615663,0 +arc_challenge,acc_norm,0.22184300341296928,0.01214165906814789,0 +arc_easy,acc,0.37836700336700335,0.009951575683331952,0 +arc_easy,acc_norm,0.35942760942760943,0.009845958893373771,0 +boolq,acc,0.5247706422018349,0.00873431671938778,1 +cb,acc,0.4642857142857143,0.06724777654937658,1 +cb,f1,0.305982905982906,,1 +copa,acc,0.6,0.049236596391733084,0 +hellaswag,acc,0.27922724556861184,0.00447702576220061,0 +hellaswag,acc_norm,0.29267078271260705,0.004540586983229994,0 +piqa,acc,0.6245919477693145,0.01129783958977666,0 +piqa,acc_norm,0.6251360174102285,0.011294565805619015,0 +rte,acc,0.49458483754512633,0.030094698123239966,0 +sciq,acc,0.649,0.015100563798316405,0 +sciq,acc_norm,0.59,0.015560917136921669,0 +storycloze_2016,acc,0.5595938001068947,0.01148001236720782,0 +winogrande,acc,0.5146014206787688,0.014046492383275832,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_2.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8aaef90401faf3e38691820c2161961f5c95b981 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.326, + "acc_stderr": 0.01483050720454104 + }, + "anli_r2": { + "acc": 0.346, + "acc_stderr": 0.015050266127564431 + }, + "anli_r3": { + "acc": 0.3425, + "acc_stderr": 0.013704669762934727 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.06724777654937658, + "f1": 0.305982905982906 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.049236596391733084 + }, + "hellaswag": { + "acc": 0.27922724556861184, + "acc_stderr": 0.00447702576220061, + "acc_norm": 0.29267078271260705, + "acc_norm_stderr": 0.004540586983229994 + }, + "rte": { + "acc": 0.49458483754512633, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.5146014206787688, + "acc_stderr": 0.014046492383275832 + }, + "storycloze_2016": { + "acc": 0.5595938001068947, + "acc_stderr": 0.01148001236720782 + }, + "boolq": { + "acc": 0.5247706422018349, + "acc_stderr": 0.00873431671938778 + }, + "arc_easy": { + "acc": 0.37836700336700335, + "acc_stderr": 0.009951575683331952, + "acc_norm": 0.35942760942760943, + "acc_norm_stderr": 0.009845958893373771 + }, + "arc_challenge": { + "acc": 0.17918088737201365, + "acc_stderr": 0.011207045216615663, + "acc_norm": 0.22184300341296928, + "acc_norm_stderr": 0.01214165906814789 + }, + "sciq": { + "acc": 0.649, + "acc_stderr": 0.015100563798316405, + "acc_norm": 0.59, + "acc_norm_stderr": 0.015560917136921669 + }, + "piqa": { + "acc": 0.6245919477693145, + "acc_stderr": 0.01129783958977666, + "acc_norm": 0.6251360174102285, + "acc_norm_stderr": 0.011294565805619015 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_3.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..0991e2471a31e5f785098fb06552daa53b143dbf --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.311,0.014645596385722692,0 +anli_r2,acc,0.368,0.015258073561521802,0 +anli_r3,acc,0.32416666666666666,0.013517438120881624,0 +arc_challenge,acc,0.18600682593856654,0.011370940183266726,0 +arc_challenge,acc_norm,0.21331058020477817,0.011970971742326334,0 +arc_easy,acc,0.3771043771043771,0.00994504194636652,0 +arc_easy,acc_norm,0.3531144781144781,0.009807078935467617,0 +boolq,acc,0.5201834862385321,0.008737927070893482,1 +cb,acc,0.4642857142857143,0.0672477765493766,1 +cb,f1,0.3116883116883116,,1 +copa,acc,0.6,0.049236596391733084,0 +hellaswag,acc,0.28161720772754434,0.004488684397979513,0 +hellaswag,acc_norm,0.29047998406691894,0.004530560646902538,0 +piqa,acc,0.6338411316648531,0.011240106070308458,0 +piqa,acc_norm,0.6169749727965179,0.011342081709082845,0 +rte,acc,0.47653429602888087,0.03006330041190266,0 +sciq,acc,0.636,0.015222868840522024,0 +sciq,acc_norm,0.606,0.015459721957493384,0 +storycloze_2016,acc,0.5617316942811331,0.01147396956148814,0 +winogrande,acc,0.5059194948697711,0.014051500838485807,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_3.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_3.json new file mode 100644 index 0000000000000000000000000000000000000000..633d943b18fc72b148be3dfba05037cc04630001 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.311, + "acc_stderr": 0.014645596385722692 + }, + "anli_r2": { + "acc": 0.368, + "acc_stderr": 0.015258073561521802 + }, + "anli_r3": { + "acc": 0.32416666666666666, + "acc_stderr": 0.013517438120881624 + }, + "cb": { + "acc": 0.4642857142857143, + "acc_stderr": 0.0672477765493766, + "f1": 0.3116883116883116 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.049236596391733084 + }, + "hellaswag": { + "acc": 0.28161720772754434, + "acc_stderr": 0.004488684397979513, + "acc_norm": 0.29047998406691894, + "acc_norm_stderr": 0.004530560646902538 + }, + "rte": { + "acc": 0.47653429602888087, + "acc_stderr": 0.03006330041190266 + }, + "winogrande": { + "acc": 0.5059194948697711, + "acc_stderr": 0.014051500838485807 + }, + "storycloze_2016": { + "acc": 0.5617316942811331, + "acc_stderr": 0.01147396956148814 + }, + "boolq": { + "acc": 0.5201834862385321, + "acc_stderr": 0.008737927070893482 + }, + "arc_easy": { + "acc": 0.3771043771043771, + "acc_stderr": 0.00994504194636652, + "acc_norm": 0.3531144781144781, + "acc_norm_stderr": 0.009807078935467617 + }, + "arc_challenge": { + "acc": 0.18600682593856654, + "acc_stderr": 0.011370940183266726, + "acc_norm": 0.21331058020477817, + "acc_norm_stderr": 0.011970971742326334 + }, + "sciq": { + "acc": 0.636, + "acc_stderr": 0.015222868840522024, + "acc_norm": 0.606, + "acc_norm_stderr": 0.015459721957493384 + }, + "piqa": { + "acc": 0.6338411316648531, + "acc_stderr": 0.011240106070308458, + "acc_norm": 0.6169749727965179, + "acc_norm_stderr": 0.011342081709082845 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_4.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..ba915cc4ed0ac689212e1b0eb2fe9520099db0da --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.333,0.01491084616422987,0 +anli_r2,acc,0.342,0.015008706182121728,0 +anli_r3,acc,0.3333333333333333,0.013613950010225601,0 +arc_challenge,acc,0.16638225255972697,0.010883248065964145,0 +arc_challenge,acc_norm,0.21928327645051193,0.012091245787615746,0 +arc_easy,acc,0.37163299663299665,0.009915897123658785,0 +arc_easy,acc_norm,0.3514309764309764,0.00979639558281772,0 +boolq,acc,0.5226299694189602,0.008736093428015821,1 +cb,acc,0.5178571428571429,0.06737697508644647,1 +cb,f1,0.34900497512437806,,1 +copa,acc,0.63,0.04852365870939099,0 +hellaswag,acc,0.28052180840470026,0.004483360370140574,0 +hellaswag,acc_norm,0.2887870942043418,0.004522725412556963,0 +piqa,acc,0.6294885745375408,0.011267826475447665,0 +piqa,acc_norm,0.6088139281828074,0.0113862156067287,0 +rte,acc,0.48736462093862815,0.030086851767188564,0 +sciq,acc,0.652,0.01507060460376841,0 +sciq,acc_norm,0.613,0.015410011955493933,0 +storycloze_2016,acc,0.564404061998931,0.011466111817562836,0 +winogrande,acc,0.5201262825572218,0.014041096664344334,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_4.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_4.json new file mode 100644 index 0000000000000000000000000000000000000000..ac92c768127227a93f3fa87dc100cfafa3320c68 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.333, + "acc_stderr": 0.01491084616422987 + }, + "anli_r2": { + "acc": 0.342, + "acc_stderr": 0.015008706182121728 + }, + "anli_r3": { + "acc": 0.3333333333333333, + "acc_stderr": 0.013613950010225601 + }, + "cb": { + "acc": 0.5178571428571429, + "acc_stderr": 0.06737697508644647, + "f1": 0.34900497512437806 + }, + "copa": { + "acc": 0.63, + "acc_stderr": 0.04852365870939099 + }, + "hellaswag": { + "acc": 0.28052180840470026, + "acc_stderr": 0.004483360370140574, + "acc_norm": 0.2887870942043418, + "acc_norm_stderr": 0.004522725412556963 + }, + "rte": { + "acc": 0.48736462093862815, + "acc_stderr": 0.030086851767188564 + }, + "winogrande": { + "acc": 0.5201262825572218, + "acc_stderr": 0.014041096664344334 + }, + "storycloze_2016": { + "acc": 0.564404061998931, + "acc_stderr": 0.011466111817562836 + }, + "boolq": { + "acc": 0.5226299694189602, + "acc_stderr": 0.008736093428015821 + }, + "arc_easy": { + "acc": 0.37163299663299665, + "acc_stderr": 0.009915897123658785, + "acc_norm": 0.3514309764309764, + "acc_norm_stderr": 0.00979639558281772 + }, + "arc_challenge": { + "acc": 0.16638225255972697, + "acc_stderr": 0.010883248065964145, + "acc_norm": 0.21928327645051193, + "acc_norm_stderr": 0.012091245787615746 + }, + "sciq": { + "acc": 0.652, + "acc_stderr": 0.01507060460376841, + "acc_norm": 0.613, + "acc_norm_stderr": 0.015410011955493933 + }, + "piqa": { + "acc": 0.6294885745375408, + "acc_stderr": 0.011267826475447665, + "acc_norm": 0.6088139281828074, + "acc_norm_stderr": 0.0113862156067287 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_5.csv b/421m3b91b5/evaluation/rankeval/421m3b91b5_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..8f780fffb787a08fbeebdb1f040b12403f75decb --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.345,0.015039986742055237,0 +anli_r2,acc,0.356,0.015149042659306623,0 +anli_r3,acc,0.35,0.013774667009018554,0 +arc_challenge,acc,0.1689419795221843,0.01094979565248503,0 +arc_challenge,acc_norm,0.21928327645051193,0.012091245787615735,0 +arc_easy,acc,0.3808922558922559,0.009964428212260384,0 +arc_easy,acc_norm,0.3573232323232323,0.009833205612463123,0 +boolq,acc,0.5155963302752293,0.008740799550176545,1 +cb,acc,0.42857142857142855,0.06672848092813058,1 +cb,f1,0.27238805970149255,,1 +copa,acc,0.59,0.04943110704237102,0 +hellaswag,acc,0.2818163712407887,0.0044896488650808895,0 +hellaswag,acc_norm,0.29177454690300736,0.004536500714147988,0 +piqa,acc,0.6207834602829162,0.011320331012905072,0 +piqa,acc_norm,0.6169749727965179,0.011342081709082843,0 +rte,acc,0.516245487364621,0.030080573208738064,0 +sciq,acc,0.64,0.015186527932040124,0 +sciq,acc_norm,0.605,0.015466551464829342,0 +storycloze_2016,acc,0.5649385355424906,0.011464499648626923,0 +winogrande,acc,0.5153906866614049,0.014045826789783672,0 diff --git a/421m3b91b5/evaluation/rankeval/421m3b91b5_5.json b/421m3b91b5/evaluation/rankeval/421m3b91b5_5.json new file mode 100644 index 0000000000000000000000000000000000000000..5d5abcfef31404afed96c501fab83126a73d3b35 --- /dev/null +++ b/421m3b91b5/evaluation/rankeval/421m3b91b5_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.345, + "acc_stderr": 0.015039986742055237 + }, + "anli_r2": { + "acc": 0.356, + "acc_stderr": 0.015149042659306623 + }, + "anli_r3": { + "acc": 0.35, + "acc_stderr": 0.013774667009018554 + }, + "cb": { + "acc": 0.42857142857142855, + "acc_stderr": 0.06672848092813058, + "f1": 0.27238805970149255 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.04943110704237102 + }, + "hellaswag": { + "acc": 0.2818163712407887, + "acc_stderr": 0.0044896488650808895, + "acc_norm": 0.29177454690300736, + "acc_norm_stderr": 0.004536500714147988 + }, + "rte": { + "acc": 0.516245487364621, + "acc_stderr": 0.030080573208738064 + }, + "winogrande": { + "acc": 0.5153906866614049, + "acc_stderr": 0.014045826789783672 + }, + "storycloze_2016": { + "acc": 0.5649385355424906, + "acc_stderr": 0.011464499648626923 + }, + "boolq": { + "acc": 0.5155963302752293, + "acc_stderr": 0.008740799550176545 + }, + "arc_easy": { + "acc": 0.3808922558922559, + "acc_stderr": 0.009964428212260384, + "acc_norm": 0.3573232323232323, + "acc_norm_stderr": 0.009833205612463123 + }, + "arc_challenge": { + "acc": 0.1689419795221843, + "acc_stderr": 0.01094979565248503, + "acc_norm": 0.21928327645051193, + "acc_norm_stderr": 0.012091245787615735 + }, + "sciq": { + "acc": 0.64, + "acc_stderr": 0.015186527932040124, + "acc_norm": 0.605, + "acc_norm_stderr": 0.015466551464829342 + }, + "piqa": { + "acc": 0.6207834602829162, + "acc_stderr": 0.011320331012905072, + "acc_norm": 0.6169749727965179, + "acc_norm_stderr": 0.011342081709082843 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..759123ea42b01bdbce22413430279a3ada420be9 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad2d4e619ed579e4c54c35b15071398c85aae310213f93c46286c1881e52c68 +size 78980887 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f0127249cbfe91cab04f036ac771b284ac603ac --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4864376de4b140dce098b5e1000d4b651a6ae3d6ca5383fbe2ae62a5e49931bd +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fcc737f118c06774ff2746e72231185013c07c0 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de159f4a44b26586103a1e978af10ad673fb27c9d8ae9775c1081ebea31ada3 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..09dfb1775d88632363c7c4227c63cb6af414b026 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df295aa9a1a324c075ddfa733dc9a2b8431f51344b4577ccfbc3f2e7cf11630c +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95612ccc817b6b705b2a5024dfba00a35563dabc --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628aacb05dee23a78b11bf5d726227c0637ce92e4721d94d127f6154c67b143e +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..50b108e5461bf26c5f44cbf886c7b75658a87205 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:321f111b9b297cf546205147c748676e55ba29a42a50aebcfc52187ccfdc7be8 +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f29dfad5adabdd8f777b95ddf4402d9972f49735 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f250b35d0b929f6be36efab031c77671ebfbe06ca5d209f8f756ed9209572d0 +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..500ea22553f680730ed5123ad04b744a692843f0 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17cba00ee363020eaa981f223823703ddf42a630a4123883790624cb02d2376a +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99aae7c58ceb0b4c5a1362c28849c8ff560efbab --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b3cf48db33fa77fb29f3265dc12b24ec9c09ba38c52eb0514e0a3649a04fd4 +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..07e2a60b32ba4280f83a840b7a74454f60d7792f --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc987e126ae536a45bda760e054c57c30e537539a27fbfbe0f1932c396bcd86b +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c6bd8decbf0ab54ae09ed4624dfdb92d4184103 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b28070e2448982773729903e8ff8a27f5691c0b0ac031fb7b5adadc5fe36ff76 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c232c8e7f5026a94cd27b521379e7a905c5b3b22 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2b840a5244a92355740533d7fae38c3a9538977541942b50591578d092f91a +size 78980951 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..497c46fff4eb096620d2fd4fd927f59a24065219 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b57fa10419f14841c680e86ef55a7a93cd6e45c35d40c8afe9d94219657fbd54 +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffbd0a4947bff5669cb3b724ccdbcc07e63878d1 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8c75e06cf673375f5db1d1b3cbc956d638497d5cd9b8c58f9713e2a82123af9 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..77875bccdc2d7155ac192d2093cc5536b840b7a5 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:093119e271a143e317e78832b4f04f1027fd2801a04398c28dc3af11d2bc84b3 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7587a57ed9962fbde02d67971045597e77cb0e27 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0f78f9f0fbadcd8b1d6705e7cd95f22d00d7eecebd90623036964f3b862c8b +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb78ded98fa1050d4d1145af45bd2340cb25fab2 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b39a19f6c8e370a898ecf108c63e84f59965bcd9153574f2a92ebc208d40c07 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c9223cfa21e13588cedbbe8090946ab7e355009 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b37a625ae9e89926d40e50a0104f8c6e2b797e3945d2603b589da80de324a7 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa0d83ea60557340fc9dc2b8bedcae8e3071388f --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adea3d42a2d1738b729bb98b98a7147321bfce69affc387fe37927b8a1ffc2eb +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f281993e80da8e59ca69fb81fda19d97cbc17fe1 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977a44f9e72fa8f0e3c5f5ab440193fb569b4c316b088623550b0b2f6e730f36 +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f115a0c47d0ba92f995eef710bddb07778cf24e7 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2341e9d9021dd4bc4ad301f9204c6d7091839459996618944e0270ff918fea00 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a194dbb8f3d555678a4f9215bbd73838b0dccc18 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed16927cc3e58b6a7cf4a359b23b90091f4271b05611b321c82742bc5c50e9a +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..773d882b797161d115620f7d5fc249f9d5ad9469 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02984b23e9e8a42a59c411c97aba3691b23dd70820e87b945004c46816fad4f0 +size 78980887 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79a80d3d1875512c72ab4614a0c5f359c2191760 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a5d77dc10bf1edf5ab03268ee237debfe596f56326781617be093ca0c6f548 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fd4c5403f13614477f76c210c2b8fb0fa156a3b --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b21d0a492bf56b808e00e8e4fd1b939f7499c07b379b1fc6e628ab4298a6ad +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..384ae83bd944754eff227bff65aa7f78bb07d4fa --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4452b11b25a24d40271fd4bed9ec66fcb541c4ad7839fe857d306894ca00e95e +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d50b44a61eca08d7a9f2370fe885b26d35bf234 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e38daadb483029e50902346415e9780efd3399f54dd7c74cbd2c905c35402cca +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea1b77fd8d8196825df3bcc806b5410cbaf1d713 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c3720d359f452d4beb45bfcde0588a5bdcdb8beb0f79dea28c3be7734af8b90 +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b25f825c606240cd7a2f7912996147dc7c4b883 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a26fa2a8556292df3f4c98e37bcdf5b9a6f49ad1ae993d53fc73906219eb91 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..64dc64afa55bc6665eef03ce3a2155555c3f532e --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87b0fdbb5727436851897e7621c966d910b8e5a95e28406ddc0e9e50eb2cc66 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d05e7d02129c4b8dcffcc4c39e076851185b521a --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49d391572f86ab2eb038a3d389e7527b23f7b639cfdc36ef2dc90e6e42ae1d4f +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..22044c0ea9d1a18f4010998cb4dad701a425379a --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e032520907d6a4a33a79ae48e3261f1943012b54e5179cb03a60baaab0972a9c +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec419ce8f25c2074bfe9832143c3000f789df3e7 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ed1ee494348ac2f8eeb7c55be3d969468f1b6434bec5fba5da970f851c2b9e +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c6c298f40c142ceb6a6724da6c8103508753811 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11dd971c4e6daeaf152af8359f2df1650a9178998868e68be19fed7e0825faf1 +size 78980887 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac9fc29b042178a8cacc3f9637c207e1eef8226 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af6bbd16653de24b262cc82cf883d957a211667dbe9d66caa63f3f833b3454f +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13f594a0c1533f4048e8c5398b374de043e88c2f --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f06315aa9c1a94ed241db7005c80d12ab0646156a0e2c391701533148a741d +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..47d7ff2526e796015e17807bf13346573da98a72 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b524456d49e3102a73edf53167264467c7d1f00a5d40650a3475e74d3f116b44 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcaf5ee6e5bd68a4d8e0f0485c8d350049eed0b5 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c367fe0dc145b2de12ceec716f2bd3461d2fb1f31252c695e82f35fc4e5151bb +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..95ad019b2fb7d65ad39c444c8e9f68337b4c6996 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43b9ce68a24b27474ed798135b86efe58dffc22fc8253e74c22f21d00562170 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d34f87de4fb7c13dbac79fda603c2cee2aeddc1b --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62371bc2f229c2192bf8c8fdbc08d03163e743c14699c46b71d7a6e1d1e8c5f3 +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd9099532706405792822533b6e7a413a8c96123 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9ba6658a8cc1ab35902b6a73e0bf4d541cccaaf606eccc5577c658fd539c18 +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c65cacb3bb5b691c5ef5fad85823cf2361ef0432 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f57425b2311cfe2290bf0102974bad778503b67d1bec20235c8f4899a67ad7 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..af9f4f7681f28bd2ee63914fa2f080a4e7501e9a --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd4aed9f7abfaff39bd3c5648ee6ab2bd9464467dbcdb7fb81eda4906de77393 +size 78980770 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..98c4982275bd21586f58e3a6d01ff94452ab4a4c --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e239e6a8b52d8d7a486e5ac057dbe0725380f0044f1ad787b5ada7365ff15fad +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b1838418ffcc936a504a90cf5078ae20bee9dc9 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2270d24f39dc867e3dee2d59ca6bfc21a9032d1c665fa3795b1bb5bc1c10f8eb +size 78980951 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc8b10f1ea675f6a682dd9959ac6e655981eb29a --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6f74a5a969d4dd2d864ba748db5692441bd37c06daba3cabefe6c4cefb2624a +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..23ca673a3ffddb47398b512994a05d80b44e7d35 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d33cd7aa7e72cff9d9f0173c5bece7a0a83226970a919c7819295e9d562aa28 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6640739d971ce48217c765536428ab24a94b51e7 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6de4e7333aa818662220235adb5d229703f7242ffdff17e289a55ef1f0d95ec +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd86d912212890d08cc6b1f62420e20411b5677f --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37d115678f16e0b957ad96ce57a3240fe85f71c394ab31406cd98ecbe4432d41 +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9e85a1b2b3e4ca72aecd746249fabdc92097eed --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd7753b50d21d1b9418b2351739f52463620b98dc09bdc01240d02ab3099537 +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd48700056f161032534a62d3f0b2b05a5f7e28c --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bebcdbad6e98526589dc037410de8af7ab931332a2681e4365a01255962f9330 +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..789082630db4c472efc7cf49c7ae2eea08d7fb0b --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0efc979aa0ba3d730ee3991abd38fd77c9eab35e3c1bd961c3dd6f797a0091a7 +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4f57715113ccf32cef8353bf4255a30b29030bd --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5dc0d7235b11f5d0b22ceedceef645defd5b681880052d8361e0392e1d902c +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c89630a3e4781c884066b228bc9b7726129e22b --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e609856928f16e6d2fb43344e04304caeb4855aeb6c7fcebe86a922a53400f04 +size 78981026 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e3ccf9c4af6375f582fd5ca263caaa02e75c9ed --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff994131fb13b2f3e612c466767d78990d095b0696df9fd15a9238bd6f3a814a +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..568baf6b28241cd86e813e2b33ccb9ed773d7959 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25b0d9156e2c08a62f5c196d13dc72656bd93983d3e1d1b0368ca4b67803afc +size 78980887 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e91f9d81895fff3c93a2404f8936113d5bfe5ffb --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27fedecde63a192d72b960ea039ef66912b002506583cef34bda18609298a71c +size 78980962 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..36801a99f36a64ee6d06a012173f50c7d2119c7f --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2371430abfccfffcd8c32baf360a2cbbd8e56cdc4d76cc7c954559dd61e58d2a +size 78981090 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fb055c81cb2bca67b4b72531b0242d7b204d82c --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786b2285f703a86a37afc1f8f8f8d8e7a511a11a5d38eea27993e827a4d9fd25 +size 78980834 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b55cdb59c17868e68c88e10decdbff95ab74aae1 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cb6b23d4cdbd8de7b5ac1402ddbfc40fe8d4a4668dc9b9ce562244ab637b56b +size 78980898 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5b6ef571f333b1223fecaf0a176eec6061e4de7 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8160a87ed971516ee250c01fc2617e2051755a43c071de5b10e912cd77a09e26 +size 78980823 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..427d2f56b73bb9ce39abfaf5f647adfafdea1bb2 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdd88401cb7cc5f3acf1cf9067fe1c3b674f8d9c763545a3f21513c0ea937e4 +size 78980887 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e279d9a23509809dfc99cf53b253bbd068908b2a --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4a4cf6403e64ab8e538f55fac5f4b83b89ca588b4bd82732514aa552dd221f +size 78980951 diff --git a/421m3b91b5/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/421m3b91b5/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..968d6091dd56ced76cd6aa3db3f657a278796e67 --- /dev/null +++ b/421m3b91b5/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8f3dec3875b415be24f167019ac5f94eb2375529fee5dee01fdc179971b72a +size 78980887 diff --git a/421m3b91b5/global_step7508/layer_01-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a4ae433a686de445c2c47c7060f021ef205eef0 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef88c43961a8715ab07161824795df956b265826ed2b9f2df6fe20672d4cb782 +size 134022403 diff --git a/421m3b91b5/global_step7508/layer_03-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3217eadfa7c25289b5b4f709a1e9040e7682840 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd67ad0ec6d6f52d188119cd6cab6670b937787b7cdb0372484a4204c09e9cbc +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_04-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..56b61b6699dd3e573a85ea6f722681c5e116bc19 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031eff4c9a454b0e41d0c6dcd9915fa6d9b351ae0296045ffd7ef32f15581d04 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_05-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..46953bd1c17cc081438ed26b08505a3573dcebed --- /dev/null +++ b/421m3b91b5/global_step7508/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d35975a1f21666f3952e2abc4e09ec526b82fc758ac771206c5f9589f621109 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_06-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f16d3a401d76b3ac1d756268551ceaa4a37bc484 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178d5e201664dfb0d549b16b860319629ac1200206e1dd1c3471879e84cc68bb +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_07-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f365fc5b1cebfb8f337c2e4f25bf2450dfad5098 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2246b312e8dbbc7bd0d4bb5611da926967adc15920626f5278be6a8653d72ff2 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_08-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..49addc9c5b8d8765d3a5b893a9c1d593d21753ea --- /dev/null +++ b/421m3b91b5/global_step7508/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e2370f6494815c63ec08628abc40b950e954bbf4cfc6ac6aeb71b04f834618b +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_09-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..399342a731e7061e4ef176ee32bfecd5ac5b5f29 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f7f2e1a1ca097cb3cd38a99c87bafbde3cd64e6c44dff1047eb562d8f8b29e +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_10-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..86e741ee032186f8d4fb54d9c29043f4d7f6a0bf --- /dev/null +++ b/421m3b91b5/global_step7508/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0adf31639d0fa70879d0b0b13ee4014d4a0e13d2150623704f7df49646aa9923 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_11-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc3bffb08537c29966668572d25799afdb51f693 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf3acaef8d926911ae8eed6c642e50cca646dfe666940718910d2c4b62befb4 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_12-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ae9222f16ce7b42efc3a8e6ae0d0050882adf3 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13140eafd4421fe89b23514657dcc1834188302694c33c10235915e6385bf89b +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_13-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..361484691f297e1784daa17aa9a0d24f57742716 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe4110257d10b2e600467ae21d33fb2472c505b3fb081c27ec026c2938ec4bc +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_14-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..78fb6b95e42e187989aab847589a31a35575433b --- /dev/null +++ b/421m3b91b5/global_step7508/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d263740b22182608097065d615a3e35234e1cdd1a886222594791befc165cb14 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_15-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf082dd84edb1cece420c1dc5590f52d3742094f --- /dev/null +++ b/421m3b91b5/global_step7508/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:878b5abad56080a9188beb16e69ec5c3e5bb007d26e408fab6063502a3582812 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_16-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a17cae4bc0e3527dad976973157a7ae333511613 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4669ed4235aa29e90db44237d631c7adf029c840548142e24aa050a19079b8a4 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_17-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c36093bcd00c1657587698e6501b0d6906f68dcb --- /dev/null +++ b/421m3b91b5/global_step7508/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea65e747d12f072885f9f5a88bc5818f73dbe40cba68c29d120db0d2f291a13a +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_18-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_18-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e00c03e7cea44b0c8cb647c70758e3ba5eadfa4d --- /dev/null +++ b/421m3b91b5/global_step7508/layer_18-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9367fea88f7d733557617e62ac63d88da6c82753984d76c2c03aa7acb24b119f +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_19-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..997a7ac1a38341bdbfd14e55862352d2ecdea796 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1065a46438c093dd282c1a2875a5be0861950f9d4ce8b1ef99a605749e4446b6 +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_20-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_20-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3aa405578db932ab5e4f47633a9f15286c07898 --- /dev/null +++ b/421m3b91b5/global_step7508/layer_20-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf95a8cb61206ba0c9ede1164283566136fa909eb3ccd56be8cadde2e50a658b +size 39359235 diff --git a/421m3b91b5/global_step7508/layer_22-model_00-model_states.pt b/421m3b91b5/global_step7508/layer_22-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0abcc958f7c70daffacdd08e595ee8d2c0a203ec --- /dev/null +++ b/421m3b91b5/global_step7508/layer_22-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39cbdb69e0e21866757de65925b74c524967de0bc5db9f944a919db8d34f39c6 +size 6339 diff --git a/421m3b91b5/global_step7508/mp_rank_00_model_states.pt b/421m3b91b5/global_step7508/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4ecb5f9d828f82f06b01cc59cc80c40ce532754 --- /dev/null +++ b/421m3b91b5/global_step7508/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778e46e253ac250cd1a0fd1150a21b87949e6fa313076366d2e34a3757546534 +size 37747 diff --git a/421m3b91b5/logs/2820893.err b/421m3b91b5/logs/2820893.err new file mode 100644 index 0000000000000000000000000000000000000000..99d0c22eb8c5463881425cbb9b2433cb9688e7f0 --- /dev/null +++ b/421m3b91b5/logs/2820893.err @@ -0,0 +1,1120 @@ +3: 2023-02-09 22:47:59.830217: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.830222: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.830225: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.835430: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.847424: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.847454: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.847458: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.847482: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.847480: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.848945: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.848962: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.848972: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.861585: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.861516: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.863808: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.888769: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.889094: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.889644: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.915112: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-09 22:47:59.916146: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.917487: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:47:59.940429: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.940747: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-09 22:47:59.963044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965347: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965350: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965352: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965356: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965342: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-09 22:47:59.965363: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985212: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985222: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985220: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985229: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985224: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985226: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985229: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-09 22:47:59.985219: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987790: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987791: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987802: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987803: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987821: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987826: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-09 22:47:59.987811: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993806: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993821: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993812: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993833: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993830: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993835: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-09 22:47:59.993836: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001830: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001842: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001845: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001840: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001850: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001853: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001832: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-09 22:48:00.001833: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-09 22:48:01.729707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729711: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729703: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729716: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729722: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.729720: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:01.730165: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730170: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730174: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730174: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730177: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730179: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730182: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-09 22:48:01.730183: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.872956: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872951: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872957: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.872959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:01.873406: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873408: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873409: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873413: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873414: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873418: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873416: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-09 22:48:01.873421: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964275: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964275: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964283: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964282: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964291: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964281: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:01.964782: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964780: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964785: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964787: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964789: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964791: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964795: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-09 22:48:01.964793: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365207: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365240: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365287: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365262: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365274: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365278: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365264: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:02.365639: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365670: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365670: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365683: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365693: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365700: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365703: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-09 22:48:02.365712: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.367965: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.367975: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.367991: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.367998: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.368004: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.368007: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.368016: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.368022: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:02.368421: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368426: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368451: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368451: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368457: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368469: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368472: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-09 22:48:02.368474: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.476385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476379: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476386: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476388: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.476389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:02.477511: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477515: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477518: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477520: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477522: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477524: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477525: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:02.477527: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.490677: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490684: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490680: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490687: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490693: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490702: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490698: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.490690: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:02.491109: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491108: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491115: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491117: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491117: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491119: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491121: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-09 22:48:02.491128: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.548967: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548979: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548976: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.549007: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548986: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.548974: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:02.549454: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549456: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549458: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549460: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549460: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549469: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549467: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-09 22:48:02.549470: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-09 22:48:13.752707: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752715: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752723: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752722: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752729: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752733: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752735: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.752736: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754530: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754536: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754540: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754545: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754541: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754542: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754541: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754550: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754545: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754554: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754561: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754561: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754561: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754565: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-09 22:48:13.754605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-09 22:48:13.754618: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.760117: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760113: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760137: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760126: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760133: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760129: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760125: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760146: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760151: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760124: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760159: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.760135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: 2023-02-09 22:48:13.760156: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.760161: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762234: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762236: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762237: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762240: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762238: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762249: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762247: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-09 22:48:13.762253: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762256: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762255: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762257: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762258: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762262: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-09 22:48:13.762260: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.767778: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767781: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767786: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767786: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767789: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767796: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.767800: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770054: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770054: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770052: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770058: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770059: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770065: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770069: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770069: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770075: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770076: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770212: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770219: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-09 22:48:13.770226: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-09 22:48:13.770232: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.802558: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802565: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802569: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802569: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802564: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802566: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802572: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.802574: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762116: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762120: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762122: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762130: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762126: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762126: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762126: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762134: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762138: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762129: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-09 22:48:13.762142: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762144: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762144: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762146: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-09 22:48:13.762148: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.808342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808348: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808349: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808353: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808352: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808354: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.808348: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809158: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809171: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809182: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809168: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809177: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809173: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809189: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.809195: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810291: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810305: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810296: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810298: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810296: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810302: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810303: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810315: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810317: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810317: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810319: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810321: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810344: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810343: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-09 22:48:13.810358: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-09 22:48:13.810359: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811071: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811071: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811074: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811085: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811082: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811086: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811088: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811093: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811095: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811095: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811263: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811262: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-09 22:48:13.811277: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-09 22:48:13.811277: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812085: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812092: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812090: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812093: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812097: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812108: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812109: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812109: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812110: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812115: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-09 22:48:13.812232: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-09 22:48:13.812244: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.816843: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816854: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816858: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.816862: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818380: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818384: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818383: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818388: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818389: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818392: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818403: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818405: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818406: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818408: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818409: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818499: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818501: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-09 22:48:13.818514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-09 22:48:13.818515: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +1: Successfully preprocessed all matching files. +7: Successfully preprocessed all matching files. +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +7: +7: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +7: +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils...Loading extension module utils... +1: +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +5: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +5: Building extension module utils... +5: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +5: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +6: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +6: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +3: +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/421m3b91b5/logs/2820893.out b/421m3b91b5/logs/2820893.out new file mode 100644 index 0000000000000000000000000000000000000000..d6e9ecfd918da05a87b6efbd0432b66b4828ed0f --- /dev/null +++ b/421m3b91b5/logs/2820893.out @@ -0,0 +1,6435 @@ +Model parameters: d_model 1280 ffw_size 5120 kv_size 128 n_heads 10 n_layers 18 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 18 --hidden-size 1280 --num-attention-heads 10 --kv-channels 128 --ffn-hidden-size 5120 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-421m3b91b5val --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --tensorboard-dir tensorboard_421m3b91b5val --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_421m3b91b5 --load checkpoints_421m3b91b5 --train-weighted-split-paths-path train3b9.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2820893.json --zero-stage 0 +START 2820893: Thu 09 Feb 2023 10:47:30 PM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 44.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 48.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 42.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 47.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 43.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 49.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 47.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 47.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 44.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 45.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 46.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 43.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 43.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 47.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 43.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 47.0c 80.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 45.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 40.0c 99.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 40.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 44.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 46.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 47.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 42.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 44.0c 103.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 42.0c 97.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 47.0c 93.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 44.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 46.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 41.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +0: Launching on nid007038 (0/8), master nid007038 port 9999, GPUs 8, CUDA: True +5: Launching on nid007043 (5/8), master nid007038 port 9999, GPUs 8, CUDA: True +3: Launching on nid007041 (3/8), master nid007038 port 9999, GPUs 8, CUDA: True +7: Launching on nid007045 (7/8), master nid007038 port 9999, GPUs 8, CUDA: True +1: Launching on nid007039 (1/8), master nid007038 port 9999, GPUs 8, CUDA: True +6: Launching on nid007044 (6/8), master nid007038 port 9999, GPUs 8, CUDA: True +2: Launching on nid007040 (2/8), master nid007038 port 9999, GPUs 8, CUDA: True +4: Launching on nid007042 (4/8), master nid007038 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2820893.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... None +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 5120 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 1280 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-421m3b91b5val +0: kv_channels ..................................... 128 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_421m3b91b5 +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 18 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_421m3b91b5 +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_421m3b91b5val +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +7: > setting tensorboard ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-09 22:48:56,852] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.113 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.cuda.o scaled_upper_triang_masked_softmax_hip.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: [1/1] c++ scaled_masked_softmax_hip.o scaled_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: [1/1] c++ layer_norm_hip_kernel.cuda.o layer_norm_cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o fused_mix_prec_layer_norm_cuda.so +0: >>> done with compiling and loading fused kernels. Compilation time: 33.446 seconds +0: time to initialize megatron (seconds): 93.168 +0: [after megatron is initialized] datetime: 2023-02-09 22:49:33 +0: building GPT model ... +0: [2023-02-09 22:49:33,303] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-09 22:49:33,304] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-09 22:49:33,304] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.16 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-09 22:49:35,324] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=25 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: ParallelTransformerLayerPipe +0: 19: ParallelTransformerLayerPipe +0: 20: ParallelTransformerLayerPipe +0: 21: undo +0: 22: MixedFusedLayerNorm +0: 23: EmbeddingPipe +0: 24: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-09 22:49:35,538] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-09 22:49:35,538] [INFO] [utils.py:828:see_memory_usage] MA 0.79 GB Max_MA 0.79 GB CA 0.86 GB Max_CA 1 GB +0: [2023-02-09 22:49:35,538] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.19 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-09 22:49:35,540] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-09 22:49:48,634] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-09 22:49:48,635] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-09 22:49:48,635] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-09 22:49:48,641] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-09 22:49:48,641] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-09 22:49:48,761] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-09 22:49:48,762] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.79 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-09 22:49:48,762] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.86 GB, percent = 6.1% +0: Time to load utils op: 0.10892724990844727 seconds +0: Time to load utils op: 0.10889124870300293 secondsTime to load utils op: 0.10916900634765625 seconds +0: +0: Time to load utils op: 0.10889005661010742 seconds +0: Time to load utils op: 0.10914254188537598 seconds +0: Time to load utils op: 0.1091618537902832 seconds +1: Time to load utils op: 0.10932350158691406 seconds +1: Time to load utils op: 0.10940384864807129 seconds +1: Time to load utils op: 0.10981631278991699 seconds +1: Time to load utils op: 0.10965394973754883 seconds +1: Time to load utils op: 0.10951423645019531 secondsTime to load utils op: 0.11039257049560547 seconds +1: +1: Time to load utils op: 0.11095523834228516 secondsTime to load utils op: 0.11041545867919922 seconds +1: +7: Time to load utils op: 0.10938549041748047 seconds +7: Time to load utils op: 0.10995960235595703 secondsTime to load utils op: 0.10933899879455566 seconds +7: +7: Time to load utils op: 0.10887455940246582 seconds +7: Time to load utils op: 0.10870838165283203 seconds +7: Time to load utils op: 0.10959458351135254 secondsTime to load utils op: 0.10885858535766602 seconds +7: Time to load utils op: 0.1097879409790039 seconds +7: +0: Time to load utils op: 0.0005109310150146484 seconds +0: Time to load utils op: 0.0005035400390625 seconds +0: Time to load utils op: 0.0005304813385009766 seconds +0: Time to load utils op: 0.00042724609375 seconds +0: Time to load utils op: 0.0005059242248535156 seconds +0: Time to load utils op: 0.0005815029144287109 seconds +7: Time to load utils op: 0.0009849071502685547 seconds +7: Time to load utils op: 0.0011098384857177734 seconds +7: Time to load utils op: 0.0011603832244873047 secondsTime to load utils op: 0.0010447502136230469 seconds +7: +7: Time to load utils op: 0.001180887222290039 seconds +7: Time to load utils op: 0.0011677742004394531 seconds +7: Time to load utils op: 0.0011670589447021484 seconds +7: Time to load utils op: 0.00034427642822265625 seconds +1: Time to load utils op: 0.0010943412780761719 seconds +1: Time to load utils op: 0.0011806488037109375 seconds +1: Time to load utils op: 0.0011785030364990234 seconds +1: Time to load utils op: 0.0011878013610839844 seconds +1: Time to load utils op: 0.0013942718505859375 seconds +1: Time to load utils op: 0.0012774467468261719 seconds +1: Time to load utils op: 0.0013284683227539062 seconds +1: Time to load utils op: 0.0013697147369384766 seconds +0: Time to load utils op: 0.4052605628967285 seconds +0: Time to load utils op: 0.30254507064819336 seconds +0: Time to load utils op: 0.0005197525024414062 seconds +0: [2023-02-09 22:49:49,186] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-09 22:49:49,186] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.78 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-09 22:49:49,186] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,304] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-09 22:49:49,304] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-09 22:49:49,304] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,409] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-09 22:49:49,410] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-09 22:49:49,410] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,516] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-09 22:49:49,516] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:49,516] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,619] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-09 22:49:49,620] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:49,620] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,726] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-09 22:49:49,727] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:49,727] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,830] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-09 22:49:49,831] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:49,831] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:49,941] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-09 22:49:49,941] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:49,941] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:50,047] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-09 22:49:50,048] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-09 22:49:50,048] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.01 GB, percent = 6.2% +0: [2023-02-09 22:49:50,048] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-09 22:49:50,048] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-09 22:49:50,048] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-09 22:49:50,048] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-09 22:49:50,048] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-09 22:49:50,049] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-09 22:49:50,050] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-09 22:49:50,051] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-09 22:49:50,051] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-09 22:49:50,051] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-09 22:49:50,051] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-09 22:49:50,051] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004191398620605469 seconds +0: [2023-02-09 22:49:50,051] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-09 22:49:50,123] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=25 [0, 25) STAGE_PARAMS=421207040 (421.207M) TOTAL_PARAMS=421207040 (421.207M) UNIQUE_PARAMS=421207040 (421.207M) +5: ninja: no work to do. +5: Time to load utils op: 0.16995835304260254 seconds +5: Time to load utils op: 0.0007870197296142578 seconds +5: Time to load utils op: 0.40537023544311523 seconds +5: Time to load utils op: 0.40568041801452637 seconds +5: Time to load utils op: 0.4057271480560303 seconds +5: Time to load utils op: 0.4057602882385254 seconds +5: Time to load utils op: 0.4058353900909424 seconds +5: Time to load utils op: 0.40590453147888184 seconds +5: Time to load utils op: 0.40593552589416504 seconds +2: Time to load utils op: 0.4148240089416504 secondsTime to load utils op: 0.414825439453125 seconds +2: +6: Time to load utils op: 0.4150991439819336 seconds +6: Time to load utils op: 0.4151115417480469 seconds +2: Time to load utils op: 0.414872407913208 seconds +2: Time to load utils op: 0.41484999656677246 secondsTime to load utils op: 0.414858341217041 seconds +2: +2: Time to load utils op: 0.414888858795166 seconds +2: Time to load utils op: 0.4148726463317871 secondsTime to load utils op: 0.4148738384246826 seconds +2: +4: Time to load utils op: 0.4147648811340332 seconds +4: Time to load utils op: 0.4147462844848633 seconds +6: Time to load utils op: 0.41513586044311523 seconds +6: Time to load utils op: 0.41518163681030273 secondsTime to load utils op: 0.41516947746276855 seconds +6: +6: Time to load utils op: 0.4151780605316162 seconds +6: Time to load utils op: 0.41521120071411133 seconds +4: Time to load utils op: 0.41473960876464844 seconds +6: Time to load utils op: 0.41517210006713867 seconds +4: Time to load utils op: 0.41478419303894043 seconds +4: Time to load utils op: 0.4147968292236328 seconds +4: Time to load utils op: 0.4147918224334717 secondsTime to load utils op: 0.4147956371307373 seconds +4: +4: Time to load utils op: 0.4147953987121582 seconds +3: Time to load utils op: 0.4154782295227051 seconds +3: Time to load utils op: 0.4154996871948242 seconds +3: Time to load utils op: 0.4155278205871582 seconds +3: Time to load utils op: 0.4155237674713135 secondsTime to load utils op: 0.41551995277404785 seconds +3: +3: Time to load utils op: 0.4155290126800537 seconds +3: Time to load utils op: 0.41553425788879395 secondsTime to load utils op: 0.4155263900756836 seconds +3: +5: Time to load utils op: 0.0006990432739257812 seconds +5: Time to load utils op: 0.00052642822265625 seconds +5: Time to load utils op: 0.00035262107849121094 seconds +5: Time to load utils op: 0.0003383159637451172 seconds +5: Time to load utils op: 0.0003769397735595703 seconds +5: Time to load utils op: 0.0003681182861328125 seconds +5: Time to load utils op: 0.0003731250762939453 seconds +4: Time to load utils op: 0.0006620883941650391 seconds +2: Time to load utils op: 0.0007312297821044922 seconds +4: Time to load utils op: 0.0009226799011230469 seconds +4: Time to load utils op: 0.0010151863098144531 seconds +4: Time to load utils op: 0.0009622573852539062 seconds +4: Time to load utils op: 0.0011942386627197266 seconds +4: Time to load utils op: 0.00119781494140625 seconds +4: Time to load utils op: 0.0011625289916992188 seconds +4: Time to load utils op: 0.001283407211303711 seconds +2: Time to load utils op: 0.001127004623413086 seconds +2: Time to load utils op: 0.0011627674102783203 seconds +2: Time to load utils op: 0.001241445541381836 seconds +2: Time to load utils op: 0.0011987686157226562 seconds +2: Time to load utils op: 0.0012650489807128906 seconds +2: Time to load utils op: 0.0011844635009765625 seconds +2: Time to load utils op: 0.0012478828430175781 seconds +3: Time to load utils op: 0.000865936279296875 seconds +3: Time to load utils op: 0.0009312629699707031 seconds +3: Time to load utils op: 0.0010013580322265625 seconds +3: Time to load utils op: 0.0010693073272705078 seconds +3: Time to load utils op: 0.0011124610900878906 seconds +3: Time to load utils op: 0.0010852813720703125 seconds +3: Time to load utils op: 0.0011510848999023438 seconds +3: Time to load utils op: 0.0011811256408691406 seconds +6: Time to load utils op: 0.0009987354278564453 seconds +6: Time to load utils op: 0.0010449886322021484 seconds +6: Time to load utils op: 0.0012159347534179688 seconds +6: Time to load utils op: 0.0013120174407958984 seconds +6: Time to load utils op: 0.0013191699981689453 seconds +6: Time to load utils op: 0.0013136863708496094 seconds +6: Time to load utils op: 0.0013363361358642578 seconds +6: Time to load utils op: 0.0013294219970703125 seconds +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:51,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:51,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:51,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:51,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:52,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:52,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:52,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-09 22:49:52,053] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,055] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-09 22:49:52,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-09 22:49:52,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-09 22:49:52,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-09 22:49:52,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,083] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,090] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,091] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,095] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,096] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,097] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,097] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,101] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,102] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,104] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,105] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,106] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,107] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-09 22:49:52,107] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,109] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,110] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,115] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-09 22:49:52,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-09 22:49:52,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-09 22:49:52,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-09 22:49:52,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-09 22:49:52,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-09 22:49:52,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,218] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,220] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-09 22:49:52,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,229] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-09 22:49:52,234] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-09 22:49:52,236] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,237] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,239] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,240] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,241] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,243] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-09 22:49:52,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,252] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-09 22:49:52,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,254] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-09 22:49:52,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,256] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-09 22:49:52,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,257] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-09 22:49:52,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,258] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,259] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,263] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,264] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,265] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,266] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,267] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,425] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,426] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,430] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,431] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,432] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,433] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,434] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,435] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,436] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-09 22:49:52,437] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,438] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-09 22:49:52,438] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,439] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,441] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,442] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,443] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,443] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,444] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,445] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,445] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,446] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,447] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,449] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,450] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,452] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,453] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,454] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-09 22:49:52,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,456] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,457] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-09 22:49:52,458] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,459] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-09 22:49:52,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,460] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,461] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,462] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,464] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,465] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,466] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,474] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-09 22:49:52,480] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-09 22:49:52,625] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,628] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,629] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-09 22:49:52,631] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-09 22:49:52,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,634] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,636] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-09 22:49:52,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,637] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,640] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,641] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,644] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,647] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,648] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-09 22:49:52,651] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,653] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,654] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,656] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,657] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,658] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-09 22:49:52,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,794] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,796] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,796] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,798] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,799] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-09 22:49:52,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,802] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,803] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-09 22:49:52,805] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,806] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-09 22:49:52,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,808] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,809] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,812] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,816] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,817] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,817] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,818] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,820] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,822] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,823] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-09 22:49:52,824] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-09 22:49:52,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,829] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-09 22:49:52,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-09 22:49:52,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,837] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,838] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-09 22:49:52,982] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-09 22:49:52,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:52,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:52,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:52,991] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:52,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:52,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:52,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:52,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:52,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-09 22:49:52,999] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:53,002] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-09 22:49:53,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,006] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-09 22:49:53,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-09 22:49:53,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,166] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,169] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,169] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,170] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-09 22:49:53,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-09 22:49:53,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-09 22:49:53,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-09 22:49:53,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,182] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,185] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,186] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,188] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-09 22:49:53,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-09 22:49:53,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-09 22:49:53,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-09 22:49:53,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,208] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,307] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,307] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,308] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,309] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,310] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,310] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,311] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,318] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,319] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,320] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,321] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-09 22:49:53,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,334] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-09 22:49:53,336] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,337] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,340] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,341] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,342] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,351] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-09 22:49:53,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-09 22:49:53,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-09 22:49:53,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-09 22:49:53,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,481] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,482] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,482] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,483] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,484] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,485] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,486] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,487] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,488] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,489] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-09 22:49:53,490] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-09 22:49:53,491] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,491] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,492] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-09 22:49:53,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,493] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-09 22:49:53,493] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,494] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,495] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-09 22:49:53,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,496] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,498] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,499] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,500] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,500] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,501] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-09 22:49:53,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,501] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,502] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,503] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-09 22:49:53,504] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,505] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,506] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,507] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,508] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,509] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,510] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,511] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-09 22:49:53,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,512] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,513] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,514] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,515] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,516] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,517] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-09 22:49:53,522] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-09 22:49:53,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,660] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,660] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,661] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,663] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,664] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-09 22:49:53,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-09 22:49:53,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,669] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-09 22:49:53,670] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,672] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,673] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,674] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,675] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,676] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,677] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,678] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,679] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,682] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-09 22:49:53,683] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,684] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,685] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,685] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,686] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,686] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,687] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-09 22:49:53,688] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,689] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,690] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,691] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-09 22:49:53,691] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,692] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,693] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,694] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,695] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-09 22:49:53,696] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-09 22:49:53,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,697] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,700] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,701] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-09 22:49:53,921] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-09 22:49:53,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-09 22:49:53,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:53,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-09 22:49:53,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:53,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:53,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:53,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:53,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-09 22:49:53,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:53,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-09 22:49:53,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:53,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,948] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-09 22:49:53,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:53,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,111] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,111] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,112] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,112] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,113] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,114] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-09 22:49:54,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-09 22:49:54,128] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-09 22:49:54,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-09 22:49:54,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-09 22:49:54,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-09 22:49:54,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-09 22:49:54,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-09 22:49:54,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-09 22:49:54,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-09 22:49:54,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,162] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-09 22:49:54,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,184] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,187] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-09 22:49:54,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,383] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-09 22:49:54,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-09 22:49:54,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-09 22:49:54,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-09 22:49:54,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-09 22:49:54,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-09 22:49:54,416] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-09 22:49:54,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-09 22:49:54,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-09 22:49:54,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-09 22:49:54,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-09 22:49:54,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-09 22:49:54,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-09 22:49:54,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,612] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-09 22:49:54,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-09 22:49:54,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,620] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,622] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,623] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-09 22:49:54,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,626] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-09 22:49:54,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-09 22:49:54,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,629] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,755] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,757] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,758] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-09 22:49:54,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-09 22:49:54,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-09 22:49:54,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-09 22:49:54,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,771] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,773] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-09 22:49:54,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-09 22:49:54,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-09 22:49:54,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,787] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-09 22:49:54,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-09 22:49:54,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,793] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,793] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-09 22:49:54,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,799] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,909] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,910] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,918] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-09 22:49:54,919] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-09 22:49:54,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-09 22:49:54,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-09 22:49:54,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:54,936] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-09 22:49:54,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:54,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:54,939] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:54,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:54,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:54,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:54,945] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-09 22:49:54,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-09 22:49:54,951] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:54,952] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,116] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,117] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,117] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,118] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,118] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,119] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,119] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,120] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,121] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,122] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,123] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,124] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-09 22:49:55,125] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,126] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,127] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-09 22:49:55,127] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,128] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,136] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-09 22:49:55,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-09 22:49:55,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,144] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,150] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-09 22:49:55,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,152] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-09 22:49:55,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,344] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-09 22:49:55,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,349] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-09 22:49:55,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-09 22:49:55,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,360] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,361] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,361] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,362] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,367] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,368] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-09 22:49:55,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-09 22:49:55,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-09 22:49:55,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-09 22:49:55,382] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,519] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,519] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,520] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,521] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,521] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,523] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,523] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,524] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,525] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,525] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,526] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-09 22:49:55,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-09 22:49:55,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-09 22:49:55,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,536] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-09 22:49:55,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-09 22:49:55,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,548] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-09 22:49:55,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-09 22:49:55,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,559] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-09 22:49:55,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-09 22:49:55,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +6: [2023-02-09 22:49:55,560] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-09 22:49:55,561] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-09 22:49:55,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-09 22:49:55,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-09 22:49:55,563] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-09 22:49:55,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +6: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +0: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +1: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-09 22:49:55,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +2: [2023-02-09 22:49:55,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,842] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +6: [2023-02-09 22:49:55,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:55,843] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +2: [2023-02-09 22:49:55,844] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +6: [2023-02-09 22:49:55,846] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +4: [2023-02-09 22:49:55,851] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:55,851] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +4: [2023-02-09 22:49:55,854] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +6: [2023-02-09 22:49:55,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:55,855] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +6: [2023-02-09 22:49:55,858] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +0: [2023-02-09 22:49:55,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:55,859] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +0: [2023-02-09 22:49:55,862] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +3: [2023-02-09 22:49:55,873] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:55,873] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +3: [2023-02-09 22:49:55,876] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +1: [2023-02-09 22:49:55,881] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,881] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +1: [2023-02-09 22:49:55,884] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +5: [2023-02-09 22:49:55,890] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,890] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +5: [2023-02-09 22:49:55,893] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +1: [2023-02-09 22:49:55,897] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,897] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +6: [2023-02-09 22:49:55,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:55,898] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +1: [2023-02-09 22:49:55,900] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +6: [2023-02-09 22:49:55,901] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +6: [2023-02-09 22:49:55,903] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:55,903] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +6: [2023-02-09 22:49:55,906] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +5: [2023-02-09 22:49:55,909] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,909] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +2: [2023-02-09 22:49:55,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,911] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +5: [2023-02-09 22:49:55,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,911] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +5: [2023-02-09 22:49:55,912] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +7: [2023-02-09 22:49:55,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,913] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +2: [2023-02-09 22:49:55,914] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +5: [2023-02-09 22:49:55,914] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +7: [2023-02-09 22:49:55,916] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +2: [2023-02-09 22:49:55,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,916] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +5: [2023-02-09 22:49:55,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,918] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +3: [2023-02-09 22:49:55,919] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:55,919] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +2: [2023-02-09 22:49:55,919] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +5: [2023-02-09 22:49:55,921] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +4: [2023-02-09 22:49:55,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:55,920] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +3: [2023-02-09 22:49:55,922] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +7: [2023-02-09 22:49:55,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,923] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +4: [2023-02-09 22:49:55,923] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +7: [2023-02-09 22:49:55,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,925] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +7: [2023-02-09 22:49:55,926] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +3: [2023-02-09 22:49:55,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,926] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +3: [2023-02-09 22:49:55,925] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +3: [2023-02-09 22:49:55,928] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +7: [2023-02-09 22:49:55,928] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +5: [2023-02-09 22:49:55,929] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +3: [2023-02-09 22:49:55,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:55,930] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +6: [2023-02-09 22:49:55,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:55,932] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +6: [2023-02-09 22:49:55,932] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +6: [2023-02-09 22:49:55,935] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +1: [2023-02-09 22:49:55,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,945] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +5: [2023-02-09 22:49:55,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,947] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +1: [2023-02-09 22:49:55,948] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +3: [2023-02-09 22:49:55,950] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,950] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +3: [2023-02-09 22:49:55,950] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +1: [2023-02-09 22:49:55,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,953] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +5: [2023-02-09 22:49:55,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-09 22:49:55,953] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +3: [2023-02-09 22:49:55,953] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +5: [2023-02-09 22:49:55,956] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +1: [2023-02-09 22:49:55,956] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +7: [2023-02-09 22:49:55,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,956] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +6: [2023-02-09 22:49:55,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:55,957] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +0: [2023-02-09 22:49:55,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:55,958] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +7: [2023-02-09 22:49:55,959] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +6: [2023-02-09 22:49:55,960] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +0: [2023-02-09 22:49:55,960] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +2: [2023-02-09 22:49:55,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,962] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +4: [2023-02-09 22:49:55,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:55,962] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +0: [2023-02-09 22:49:55,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:55,965] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +2: [2023-02-09 22:49:55,965] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +4: [2023-02-09 22:49:55,966] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +7: [2023-02-09 22:49:55,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,966] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +5: [2023-02-09 22:49:55,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:55,968] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +5: [2023-02-09 22:49:55,967] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +0: [2023-02-09 22:49:55,968] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +4: [2023-02-09 22:49:55,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,969] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +4: [2023-02-09 22:49:55,969] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +2: [2023-02-09 22:49:55,970] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +5: [2023-02-09 22:49:55,971] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +4: [2023-02-09 22:49:55,972] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +7: [2023-02-09 22:49:55,975] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,975] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +4: [2023-02-09 22:49:55,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:55,978] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +7: [2023-02-09 22:49:55,979] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +4: [2023-02-09 22:49:55,981] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +1: [2023-02-09 22:49:55,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,983] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +3: [2023-02-09 22:49:55,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,986] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +3: [2023-02-09 22:49:55,986] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +0: [2023-02-09 22:49:55,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:55,988] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +3: [2023-02-09 22:49:55,990] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +0: [2023-02-09 22:49:55,991] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +7: [2023-02-09 22:49:55,991] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:55,991] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +0: [2023-02-09 22:49:55,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:55,993] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +1: [2023-02-09 22:49:55,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:55,995] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +7: [2023-02-09 22:49:55,995] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +0: [2023-02-09 22:49:55,996] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +4: [2023-02-09 22:49:55,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:55,997] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +1: [2023-02-09 22:49:55,998] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +2: [2023-02-09 22:49:56,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:56,001] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +1: [2023-02-09 22:49:56,002] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:56,000] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +1: [2023-02-09 22:49:56,002] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +4: [2023-02-09 22:49:56,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:56,003] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +2: [2023-02-09 22:49:56,004] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +1: [2023-02-09 22:49:56,005] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +0: [2023-02-09 22:49:56,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:56,006] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +4: [2023-02-09 22:49:56,006] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +3: [2023-02-09 22:49:56,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:56,008] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +4: [2023-02-09 22:49:56,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-09 22:49:56,009] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +0: [2023-02-09 22:49:56,009] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +3: [2023-02-09 22:49:56,011] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +4: [2023-02-09 22:49:56,012] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +0: [2023-02-09 22:49:56,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:56,019] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +7: [2023-02-09 22:49:56,019] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-09 22:49:56,019] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +0: [2023-02-09 22:49:56,021] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +7: [2023-02-09 22:49:56,023] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +6: [2023-02-09 22:49:56,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:56,030] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +6: [2023-02-09 22:49:56,033] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +1: [2023-02-09 22:49:56,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-09 22:49:56,042] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +1: [2023-02-09 22:49:56,045] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +0: [2023-02-09 22:49:56,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-09 22:49:56,074] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +0: [2023-02-09 22:49:56,077] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +2: [2023-02-09 22:49:56,088] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:56,088] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +2: [2023-02-09 22:49:56,092] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +2: [2023-02-09 22:49:56,102] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-09 22:49:56,102] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +2: [2023-02-09 22:49:56,105] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +6: [2023-02-09 22:49:56,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-09 22:49:56,138] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-02-09 22:49:56,141] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +3: [2023-02-09 22:49:56,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b91b5/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-09 22:49:56,144] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +3: [2023-02-09 22:49:56,147] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +0: successfully loaded checkpoint from checkpoints_421m3b91b5 at iteration 0 +7: time (ms) | load-checkpoint: 4439.67 +0: estimated model parameters: 0.42120704 +0: estimated model parameters without embeddings: 0.35419648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-09 22:49:56 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.037963 seconds +0: number of documents: 8148327 +0: > dataset split: +0: train: +0: document indices in [0, 8148327) total of 8148327 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.099 seconds +0: total number of samples: 1903063 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.035003 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.012 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-09 22:50:10 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 23163.91 | train/valid/test-data-iterators-setup: 13238.84 +0: [after training is done] datetime: 2023-02-09 22:50:10 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.354960E+00 | lm loss PPL: 2.864446E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2820893: Thu 09 Feb 2023 10:50:39 PM EET diff --git a/421m3b91b5/sbatch_421m3b91b5.sh b/421m3b91b5/sbatch_421m3b91b5.sh new file mode 100755 index 0000000000000000000000000000000000000000..4341e4aa5b0e6c50a2b8e5983d055320f76d7ffa --- /dev/null +++ b/421m3b91b5/sbatch_421m3b91b5.sh @@ -0,0 +1,163 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b91b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train1b5.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_1B5_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1_922_149 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 19_221 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b91b5/sbatch_421m3b91b5val.sh b/421m3b91b5/sbatch_421m3b91b5val.sh new file mode 100644 index 0000000000000000000000000000000000000000..69e8dda0bdbd051817905c0935210e89a683c4e7 --- /dev/null +++ b/421m3b91b5/sbatch_421m3b91b5val.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b91b5val +VARIANT_CKPT=421m3b91b5 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train3b9.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b91b5/tensorboard_421m3b91b5/events.out.tfevents.1675875178.nid006678.91122.0 b/421m3b91b5/tensorboard_421m3b91b5/events.out.tfevents.1675875178.nid006678.91122.0 new file mode 100644 index 0000000000000000000000000000000000000000..e50309820dc94280624276861155f301f7fcce29 --- /dev/null +++ b/421m3b91b5/tensorboard_421m3b91b5/events.out.tfevents.1675875178.nid006678.91122.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:698481d249829cbb4b2cb9550c4fb00525d6d99f0a08a5add3699bd597b9528b +size 13359037 diff --git a/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675555841.nid005699.34029.0 b/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675555841.nid005699.34029.0 new file mode 100644 index 0000000000000000000000000000000000000000..3d743031248731e20c0e399b3db01762c32fb7de --- /dev/null +++ b/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675555841.nid005699.34029.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4118864fdc80d8aecbee35d3193573c02cb43312fb1b6972883f9e62929fa9 +size 980 diff --git a/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675975736.nid007045.54098.0 b/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675975736.nid007045.54098.0 new file mode 100644 index 0000000000000000000000000000000000000000..d58c9a17cfdfba4b62335acdb60511b0c4ccb034 --- /dev/null +++ b/421m3b91b5/tensorboard_421m3b91b5val/events.out.tfevents.1675975736.nid007045.54098.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb245975fffc175acb8c9aa02192794f01b02ea38f6c61e090de9243b8ee0890 +size 980 diff --git a/421m3b91b5/transformers/config.json b/421m3b91b5/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea88768d351c626be3e9ca050a00f7a5eb7f522c --- /dev/null +++ b/421m3b91b5/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 1280, "n_layer": 18, "n_head": 10, "n_inner": 5120, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/421m3b91b5/transformers/pytorch_model.bin b/421m3b91b5/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa67288dc4bcf7348e6d0aa885250104f77d8694 --- /dev/null +++ b/421m3b91b5/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f619e1143352b4e400c751b0edcbc92c2e31c41ef97655711863d139c16ee184 +size 993488781 diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..f7bed8d405ac3ea2ec4b08a435db28563e9ae798 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.2514375429625041, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.021998769689162215}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.05458209453689849, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0013170294944666171}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.2336973485576593, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0040096086479517205}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.08324727569094531, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016897087122561929}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.02537832167404535, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007376731654434439}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.11385934611253304, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0027982806538345414}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03937584218396654, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010694968357709916}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.053290497762677254, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012667710338326798}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23006581827203837, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003953117095365217}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.08142923304713769, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016238266133227941}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.05232107137991547, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012444155901422537}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22537101759593997, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037776155749755847}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07983781819186882, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015754149459580782}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..41d87f5473626cfea7d3390b9171162176257957 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.21769111537226385, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03137697858822512}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.052001919875535305, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001321082636852792}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.23988446859803386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.004156869077188399}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07951030843670538, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001723017279624104}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.023111231906370898, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007776060967722213}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.10329178271134923, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002685870512933402}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03540060823925364, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010747042112953737}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.050152137757026496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012390593186375923}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.23295789943347153, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003998023116442341}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07686062327992164, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016204699207326648}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.049657111072679484, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001251376403565292}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.22830496660590702, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003848392166869495}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07589271888993057, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016212233683597064}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..c513a9291b2aef6f1939122f832f0a828d5aa46c --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.13233131077367935, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.020754119796718328}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04657043035383532, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011239018871889047}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22310195073143146, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0036710595238463504}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.072229178898602, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001543092956764265}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.019500049975035447, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000655019928228548}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.08944246985230496, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024649243606626467}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03014855551830984, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009380925507866237}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.045134841152731975, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010504863598865273}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2167111755014583, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0035124177345890236}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07016923751429241, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001463021965933563}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.044868836291338345, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0010785128355174395}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.2140025608028596, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0035049787217478366}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.06958672859211196, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014866571421992408}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..5c7f0ab90e4d9e3c7eab29a492a0199d3063869c --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.13947101812447582, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.018315087240793882}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04628425620736115, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011906227868207762}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22162628233257115, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003699603412762138}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07142372334512734, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014967716009997196}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.01900801974071657, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006225076787649264}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.09082883316536437, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0024924784743755716}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.029728278072032675, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0009115508980129494}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.0449857247777386, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011445457261440822}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.2157765528802989, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003568608064613849}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.06951257011480294, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014353167256811218}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.044538020363677544, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001153245572021182}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21261549130389004, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.003555356149389382}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.06870694683333799, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014430132276342272}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0f809209c581bd148b465e56382b476297c152d5 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1527953877181468, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.019768736235517084}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.049405661249387195, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015067654970712654}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.21945812028893277, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038030401302535473}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.07322467651453785, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015776011765047185}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.019507736069696385, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006635980926413334}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.0898799136549315, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0025235606959418174}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.030133163651116946, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000946448138374341}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04748079297445503, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014455281648470706}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21167906766927475, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.003606124529568187}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07052992790200849, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0014961569169770967}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.047420187913872876, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.00146239713460076}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21044965879733385, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0036184924163132487}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07026457828396626, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015085357222444655}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..ecff1349797766e9dc915341e1b26fe6a9f49dc2 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "bleu": 0.1543511832741751, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.016910234829161125}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_precision": 0.04897180945064415, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0011879866388460427}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_recall": 0.22641617291173682, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0038708800569893387}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge1_fmeasure": 0.0752860133282323, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016079889297565108}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_precision": 0.01979288435902756, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006930205249820375}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_recall": 0.0924249139436825, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.002601472833120827}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rouge2_fmeasure": 0.03067588405791704, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000990869627019482}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_precision": 0.04693858845286846, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011062268749065108}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_recall": 0.21842890172009044, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0037028076196281346}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeL_fmeasure": 0.07241506380370862, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0015195422798472567}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_precision": 0.04684600487139167, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011143004580429542}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_recall": 0.21758509824855146, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0037100549914324075}, {"task_name": "GEM/web_nlg_en", "prompt_name": "PALM_prompt", "rougeLsum_fmeasure": 0.07218362769944814, "fixed_answer_choice_list": null, "dataset_path": "GEM/web_nlg", "dataset_name": "en", "subset": null, "prompt_id": "3e41305c-5461-4cf3-853d-8a6fb5747623", "prompt_jinja": "I will verbalize an abstract representation of a sentence in natural language. To do so, I will first show the representation and then the natural language. The text needs to include all of the information in the representation.\n\n{{input | join(\", \")}} {% for i in references %}\n ||| {{ i }} \n{% endfor %}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00152233932545025}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..de5d9cb9ddcf4468ea0aceb2d332968eb4378c5b --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.08339163245487013, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015701734542072692}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.1339614422246552, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0022063646191760266}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09500586912764274, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001581030847773693}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00971079106803878, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00043115845148404607}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.016507607826228517, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008300127388871056}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.01120473259619999, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0004805228056876556}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0740161323947552, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013030587331252655}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.12102487819642505, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019286216477420524}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08483002284596712, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0013213133984729892}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.07840425802379598, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014506133818074898}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1266093161972992, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002050758180318845}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.08946714159842706, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014568402519778134}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.5006634552225351, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.044910978275131574}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..3867f1ce6c822f34b203658158c414fbbe669f07 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.11154914738842088, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001565926577691468}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.17837787236817565, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0022939374135331763}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.12650922949233337, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0015776384826867381}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012303282741217402, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004977312922919948}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.02120986919186197, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.000961024479567027}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.014185318776824654, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005539952463359423}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0819807938213227, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010365171738526949}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1347716870219141, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016852072346891771}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.093445358194514, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010265579811074785}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10520376697434682, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014560555302690096}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.16883413702342775, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0021514249258459766}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.11937816894449679, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0014610839777197875}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.7308707235181653, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03624491848839971}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..eb07e5b34a61fc009e0bfd6a499cc9ff48a1a773 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10718496730177166, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0015158574960997943}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.15820343048358307, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0021422121164417526}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.11493621326625231, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0014165434365593652}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.009485434740860894, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0004499838163545394}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.016060981276017856, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008780026374129505}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.010491267852697777, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00045750208006263256}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.0828490160772031, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0010992324075365162}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.1250415287204922, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0016858824852820932}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08905184570291635, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010026383889898429}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.10147571850767263, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0014253846921573384}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.1496723021555078, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.001999788388601054}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.10867696221681347, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0013166067421143065}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.574275497631973, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03208608000882317}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..a482b09f6700e671a4d447d0011320f97f5dd2ef --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.10117615812697052, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.002001765686505826}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.13395309268486705, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0024326536614082067}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.09863778696344866, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0016224746347034249}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.012462726953274111, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006845656283001023}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.01804598548128886, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0009715950225847244}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.011959535486630863, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005266925357857687}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.08228663390439268, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0016068246788563708}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.11039159364883624, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002002607001174257}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.08014064782546042, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001255446986058769}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.09508913606615355, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0018770902467542421}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.12595075159640753, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002280111696329522}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.09253590049962912, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015061332916376507}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.8154178741377799, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.0711428685141705}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..a13cd1e2665d542d88994469274ad16c8ae8f3d5 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.03898357731611677, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0017825874804167753}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.04532969573622113, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0018850566541848744}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.033416501266504285, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0012811365874590627}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.00575301955053695, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006865146047287286}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.006448105016916232, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0006394944930278906}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.004346926730043043, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0003414378126847235}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.033141939533690275, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.001553685707066213}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.03825941205326226, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0015857656133650632}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.028002192357454752, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0010559374376441348}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.0365582282355839, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016924791527445587}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.0421362580870465, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0017481195713651538}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.03105060959588777, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0011822792638006734}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 0.1304924586039303, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.015544038110929517}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..770357631a0dd9780b5aae9b07da93c4af9a8500 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_precision": 0.006680699820051665, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0007831449627821159}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_recall": 0.008032946692221482, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0009295621673176173}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge1_fmeasure": 0.005735850006894939, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0006087235917222238}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_precision": 0.0009402204716120495, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0002685696838656648}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_recall": 0.0014701868415047995, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.00040920046688979096}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rouge2_fmeasure": 0.0008163688826715328, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00016062789042140316}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_precision": 0.005734541876408672, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.000687876116925154}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_recall": 0.006799717290334942, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0008033313544632205}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeL_fmeasure": 0.004789842927248061, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0004998401506731663}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_precision": 0.006248631750092553, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0007392179272479729}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_recall": 0.0075027920259527195, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0008756411251168826}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "rougeLsum_fmeasure": 0.005327313824531753, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005639936965640789}, {"task_name": "GEM/wiki_lingua_en", "prompt_name": "tldr_en", "bleu": 1.7187355603562817e-08, "fixed_answer_choice_list": null, "dataset_path": "GEM/wiki_lingua", "dataset_name": "en", "subset": null, "prompt_id": "d3c5baa3-5e37-46f8-b1b2-5b834181c9da", "prompt_jinja": "{{source}}\n\nTL;DR in English: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 5.0484493431935596e-08}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1ecb48edfe0e8ce93f8d0ab8d164ea3c7e527b9c --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 0.01680243748394886, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.002698090075425261}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.6433261276659951, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0029218316233115605}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.08476188184739113, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006754266013801586}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.14662019660341005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0009853032775837527}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.46212847085152825, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003642947152471072}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.04262636012387003, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0004484611360318446}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07673989711292045, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0007457397869014458}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.6426733498882173, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0029154716414218082}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.08466498168636054, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0006738662721002983}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.14645605673737266, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0009826864611526293}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.6423963007669804, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029201140300377524}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.08440337916364413, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0006553411263838314}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.14626033409401368, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0009813673062461156}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..f26250a8ddf9ae97ffd5a80cf0868c3e4873b8d7 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.606807825191092, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09591000011528884}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.33812705186617115, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0030059547853716175}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2656622288074228, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0023779546588631423}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2785732199964262, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0021267678733130654}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.09524146435617294, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0024246810891708813}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.06840391458761358, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001331519655928045}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07187122797474006, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013330899113155933}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2510116567489667, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0026151315715530056}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.19361887293082036, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0018123219187092408}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20361863606382402, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.001641365236629229}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.28122013134381013, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0028178130895461914}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2186184944989252, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002073233154182344}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2296849019393887, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0018950866763813939}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..2a0fbc78377f03318f371150ba9be0dfa2f4d69c --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 3.992792767047178, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.09725526110232631}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.33608162616426657, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0031714291275481456}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.27361700767297814, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.002511311849708488}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2819453651202342, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022491484131099567}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.10251377541302573, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002529512493704511}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.07550238911894418, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0013919432182609326}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.07804530186588765, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0013694663864768511}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.2493183914789315, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0027338010573676377}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.1989015651153766, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.00188976140279768}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.20544412900207384, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016999081653822567}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.27995922095515885, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0029605640783204592}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.22532233392524176, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00219041995304143}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23257355340408511, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0019945764398285565}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..487f5e246b15ca9267eeff4d719dd57f8f21dfaf --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 4.542589846928786, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.11022442742688977}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3532606000909218, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003394671211548118}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2790053105425374, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0025333819014360984}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.2898476701675653, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0022762938721868015}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.11778544433669538, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.002806905841317741}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.08272326311443293, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015245876152504886}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.08636994486944548, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0014768318615889233}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.26309518558047507, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.003000419731819453}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.20306642059186092, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0019374318026212406}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.21152763709695743, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017565192258644194}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.29542533245841884, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.003198053867673902}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.2301634245125563, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0022249235590292974}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.23961551003163925, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002030612765036443}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..2e11e66f4becfc92a8c3b94684ec788139cc41ea --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.114285558003046, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.20109382641670068}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.3733235088964239, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003609553087130593}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2877821560581824, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026362691288050522}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.29986918539212454, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002342588306645915}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.13637647673895614, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.003086580748456374}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.09285242050197177, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016087031680355136}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.09735409373330947, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0015722322876525361}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.28242300696080636, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0032929610979490643}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.21130880883099537, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002044919631479113}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.22122022721608414, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018554112374430712}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3151065011267256, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0034586989073176035}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.23813807804934384, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.00230934645012491}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.24924810659702057, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.002104346485260036}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..e5ff25b3d5fd4a0bd9f2ee72aca9fbd40faecb63 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "bleu": 5.632987033503432, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12270638039694408}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_precision": 0.39620617849070117, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.003692529570425549}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_recall": 0.2988258431534535, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0026154059096158704}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge1_fmeasure": 0.313198984692364, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002298889735236565}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_precision": 0.15550185624641005, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0033115072925049963}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_recall": 0.10296881934063905, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0016637455699906201}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rouge2_fmeasure": 0.10879095982587014, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.001637357575795527}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_precision": 0.30318450489358567, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0034204881401338435}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_recall": 0.2224452039754921, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002075351954997885}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeL_fmeasure": 0.23395898887358021, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0018573982535088618}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_precision": 0.3365106811613415, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0035625351600787124}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_recall": 0.249899897084878, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0023428692540716743}, {"task_name": "e2e_nlg_cleaned", "prompt_name": "generate_text_restaurant", "rougeLsum_fmeasure": 0.2625181349452857, "fixed_answer_choice_list": null, "dataset_path": "e2e_nlg_cleaned", "dataset_name": null, "subset": null, "prompt_id": "1acabbc3-c9b9-4624-a684-29faeccff46f", "prompt_jinja": "Given the following data about a restaurant:\n{% for feature in meaning_representation.split(\"]\") %} {% set key = feature.split(\"[\")[0].replace(\",\",\"\") %} {% set value = feature.replace(\",\",\"\").replace(key+\"[\", '''') %}\n{% if value != \"\" %} {{key}} : {{value}} {% endif %}\n{%- endfor %}\nGenerate some text about this restaurant. ||| {{human_reference}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0021061093071915913}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_0.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..82a53b6c26a048e8526800df5cacc4b75d4b08e2 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.09771365246979172, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016248674407969557}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.23650282724178925, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003565646784197251}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.1352322775766746, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.002093147476186513}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.01591614621801506, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0007733721524986365}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.039557433327920236, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0018240383271500894}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.022118301154473812, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0010229294547598842}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.08548043307254967, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0013203675221128862}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.20805176836389447, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002981835490749998}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.11848768408472485, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017014410425288265}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0757578176731396, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0012941776669993434}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.18487501359322192, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002939294199863662}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.10492817925826417, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0016648037816994427}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.7983779878069583, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08407645621525846}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 0, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_1.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..685e13e99c5e77b29d524e919d08d9ca58a20e0d --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08873212210300226, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.001484865853449878}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21862820911914055, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0033350892165413724}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12464195903723292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.001986282187084721}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.011323100811233888, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006143156724747702}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.028138369341575313, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0014918182252256981}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.015956828653791853, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008556268371531968}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07615953328644745, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0012055159065040108}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.189233538409857, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002774433671102753}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10721848422534487, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016173733464716808}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06837611993312843, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011392914654355624}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.17039042638815952, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.002688202908025971}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09633539960893688, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015434800775443016}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5795358900599222, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.08200609097903194}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 1, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_2.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..34c5fa12c9de462dd7891482a38030ad9725a381 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08540093432806928, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0014365714497008954}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.21231148858831494, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003293271964879966}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.12031552187548465, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0019401023596279901}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.011502007349166413, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000604120354786913}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.029234857968137292, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0015211619077998635}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.01630470699791051, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.000846390835932116}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07491986234635176, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0011927622285722677}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18731419594542628, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002781788617302899}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10569353247950078, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016112264732663693}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06642701438881495, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0011073815436868529}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.1667781953902699, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0026526498512359333}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0938181520328098, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0015105722754868353}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.615246044289447, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.07561222529856154}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 2, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_3.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..4468d870b5e4177dd0ee597afbc0d91f45b05e4c --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.08494601064475496, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0016533379054312243}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.20388079080231894, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.003487003634112906}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.11681165330668473, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.00203678054821043}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.011605657050319455, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.0006267783397381562}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.02868802660607133, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.001500648134927792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.016120835877292777, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0008399416007048803}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.07512470071697985, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0014388196947271717}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.18066276970166414, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.002953620796499381}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.10324876660992884, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0017215298385734191}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.06684741877574751, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.001337910085082421}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.16093917951269368, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0027217417123091563}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.09173565213355828, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.00156573820081352}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.5363135385730404, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.03707981819107192}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 3, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_4.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..4db48274b7ee51e99811bd0a04c713a4656f8dc2 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.029193163671091177, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0019225623982780473}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.05165986432248715, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0029726627100479727}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.03321457300244443, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0018949279717989612}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0038735350521400523, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.000446624939881871}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.008240428320551699, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0008999130022158124}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.00491498426582168, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.0005142132536709081}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.02589016692167336, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0017307301951775165}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0461329470241897, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0026433535996646575}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.02937853466978305, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0016519082698269935}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.024004200518679002, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0016916307100890552}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.041413519810500114, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0024218231110929894}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.026657181188817783, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.001539518289253859}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 0.36380702453237423, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 0.12315867751461915}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 4, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_5.json b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..466e09ac1e2f14d41f7fafa16f28c93856b97b78 --- /dev/null +++ b/421m3b93b9/evaluation/generation/agg.421m3b93b9_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1 @@ +{"results": [{"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_precision": 0.003092116221168736, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_precision_stderr": 0.0008391229048273843}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_recall": 0.002619968676809126, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_recall_stderr": 0.0006825595822719792}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge1_fmeasure": 0.00277644807976023, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge1_fmeasure_stderr": 0.0007325746737680531}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_precision": 0.0005150702362134104, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_precision_stderr": 0.00019157235260488142}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_recall": 0.0004353324872192797, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_recall_stderr": 0.0001685056136899175}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rouge2_fmeasure": 0.0004684279631865969, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rouge2_fmeasure_stderr": 0.00017706692107499406}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_precision": 0.002222977376680864, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_precision_stderr": 0.0006043308708610969}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_recall": 0.0019631674265652684, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_recall_stderr": 0.0005268242286389718}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeL_fmeasure": 0.0020346467233967516, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeL_fmeasure_stderr": 0.0005427546343352937}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_precision": 0.0023634085002301602, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_precision_stderr": 0.0006503711974140844}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_recall": 0.0020809098481805793, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_recall_stderr": 0.0005629942602894516}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "rougeLsum_fmeasure": 0.0021622748247738274, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "rougeLsum_fmeasure_stderr": 0.0005841682012876381}, {"task_name": "gem_xsum", "prompt_name": "article_DOC_summary", "bleu": 4.115418103698432e-38, "fixed_answer_choice_list": null, "dataset_path": "GEM/xsum", "dataset_name": null, "subset": "", "prompt_id": "a8d4ecfa-c944-44d5-878c-04fd5db59e64", "prompt_jinja": "Article: {{document}}\n\nSummary: ||| {{target}}", "prompt_original_task": true, "comment": "", "bleu_stderr": 6.296520100128421e-34}], "config": {"model": "hf-causal", "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", "task_args": "", "num_fewshot": 5, "batch_size": 16, "device": "cuda", "use_cache": false, "limit": 3000, "bootstrap_iters": 10, "seed": 1234}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..30d354a1e4b36cfdda4a70a03acc65a83b0f2cd2 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d245107cf239bcd36652d6506ca2684cdfac5b4b17011f6ce4c27c383d0db1 +size 4121243 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e152360f52f8d1bbd4a249bec81316be5c06e898 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c459eb360f795c4ad7be788c468f8859f94700e582829027610055b6cc4cb8bb +size 5142591 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..587fe957b8657c210ba51803b8e5206fc537f508 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12724d4edf6555b7e1fe3268a213a4627f3ddefaf32b164369d57d83b995216a +size 6068853 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7538b4cd5b37ae9929982a41bb76c5a7e2cb3fe7 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55db3787489726bf06ce0008d534b83662397bd924fe4fbfbb55d1235b93a568 +size 6952110 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f115ae7c267d7b471dea946314742a03eb186201 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f317d761a05bba2c86472a97de5b037050e6cf74515b577fb0e380ead112565 +size 7829328 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..546867e72aaafb470f7b686672c8eb29f46b64cd --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31825471f5bb5286361e7cb32a4822008593602893b777ddd01e696bbcfa5b5 +size 8702559 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f3ef1f470ca145d57bf570fdafb8b61dbf90a0a0 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f0170b8343f090dcd2deaefc466f6ab7cbb453eaf7bcb20bb1b1e0788ffd5a +size 7659406 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7377facc24b429cde57282587d2a82e0ef509424 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6d85317dd6516133cd6a937e47d0e1d36fbbb4bbf8cc52dc8f37d9dfb1c8eb4 +size 13335168 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..84f26ef32f97522bab62730b5fb769e84398b821 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce1c37879e39378ae3311a92d76cf2bbc52eb9983f2675510c90a9db584a8ec +size 18886765 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..185100a1c58445e3da45a3aa08f5b8c639201efe --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066247645ea174f05e11985a4b4fa4bd45644d16303d0601b6385bbeae3e844c +size 24281394 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..77cd6e79f63a7b8b7f87ed68991e058eb22b0cb4 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a036057b1477e2b2b83281e1c24542b989b1bb24807d8942363951e8d562ea +size 29443383 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..51bef55601bf860a8da4b0d4726928d36db23923 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a9341679247db71aa4fe6f423c0ee44d4776b25463484b406b9b4b8c1adef3 +size 34794861 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..62000bc236c48658c686897528c1e3c293649ebf --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8753da0fd6f6bdc90c908d448dfb1e0e60440df1092b41bef8c9e3e1846234 +size 3678981 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..27dbcae7ef571d23bac3e3f0c296041c76885e8c --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014ea4d2c14941065196fbbf7f8fc4dcb862ed2668f42f3a05bb01408a4fce69 +size 5027097 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..23167ac5129dcab8057370be5ec2cf61e25a59e5 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da32398dd2e83896f85fcf09321b216b4e36a249ee7e891405fb742fd8a134b +size 6121566 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..16e0102485f466c268ce53ea4ac0ac80596d3683 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c202c06ad1d96690793f34ce83744f2b745699b2f5267ed9bdd359ca4d6d84 +size 7201940 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..302d8e3352e782d1cbe5a0416e0e83702391fbf0 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960068ccc434a844205ef4bf07ba9b90d645c8e4ab4ee587999ad914fe9813ed +size 8277849 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..f1664f2211ab5d0b52b7f03829e1f0c3278f3d03 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2378b8a48c5ada7b8d05c33e971690a5e00a13fc656738d5cb07e174b522fdb4 +size 9363952 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_0.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_0.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..10eed191c473ab6f86b8e6c46cf2b14bc2be7cc2 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_0.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd70cbc295c1085f4e0d3bec47875ed6a77f3a90323e3b8814c1079f359979c6 +size 2834558 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_1.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_1.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..397475c2fa319988a63b80225178bcfb090c9f3f --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_1.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ce34e984a2ed2f297bc11449565a2d7929b92f485ef8f4d55bde7cded363da +size 5109244 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_2.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_2.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7d72c5632a04d6fc7a91271a9da54a397a0e0bcb --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_2.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c449037ca49bdea8fce4ba497f4e2aef8d284983e025f109d19501da7d72170 +size 7381180 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_3.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_3.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7e0ff02765c9c7e8ab6ed4d0922b7930de8eaf58 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_3.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f637bff1820ecb9fd8d579652da1df451ab0cc8727343ce691b640c579398e51 +size 9649674 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_4.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_4.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..0e85aa9a2b3d09b11f2d6ed51343c5fd24cc2272 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_4.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90fa8219bbd156402ed90ecee0d8811d1bdba4c63a7d65eeff82870994ee7454 +size 11674174 diff --git a/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_5.jsonl b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_5.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..79644e55335e8855f305fb9bf2afedbf1abe1a37 --- /dev/null +++ b/421m3b93b9/evaluation/generation/examples.421m3b93b9_gem_xsum_article_DOC_summary_5.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1389674ad6cbff98b112e16c7f03524f9aea61e71932c5ac3efc60d8120f32a +size 13897536 diff --git a/421m3b93b9/evaluation/generation/merged.csv b/421m3b93b9/evaluation/generation/merged.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d6050467eb62605ade50538b0afa1e81c5ad489 --- /dev/null +++ b/421m3b93b9/evaluation/generation/merged.csv @@ -0,0 +1,53 @@ +dataset,fewshots,prompt,metric,value +e2e_nlg_cleaned,0,generate_text_restaurant,rouge2_fmeasure,0.07673989711292045 +e2e_nlg_cleaned,0,median,rouge2_fmeasure,0.07673989711292045 +e2e_nlg_cleaned,1,generate_text_restaurant,rouge2_fmeasure,0.07187122797474006 +e2e_nlg_cleaned,1,median,rouge2_fmeasure,0.07187122797474006 +e2e_nlg_cleaned,2,generate_text_restaurant,rouge2_fmeasure,0.07804530186588765 +e2e_nlg_cleaned,2,median,rouge2_fmeasure,0.07804530186588765 +e2e_nlg_cleaned,3,generate_text_restaurant,rouge2_fmeasure,0.08636994486944548 +e2e_nlg_cleaned,3,median,rouge2_fmeasure,0.08636994486944548 +e2e_nlg_cleaned,4,generate_text_restaurant,rouge2_fmeasure,0.09735409373330947 +e2e_nlg_cleaned,4,median,rouge2_fmeasure,0.09735409373330947 +e2e_nlg_cleaned,5,generate_text_restaurant,rouge2_fmeasure,0.10879095982587014 +e2e_nlg_cleaned,5,median,rouge2_fmeasure,0.10879095982587014 +e2e_nlg_cleaned,5,average,multiple,0.08652857089702888 +gem_xsum,0,article_DOC_summary,rouge2_fmeasure,0.022118301154473812 +gem_xsum,0,median,rouge2_fmeasure,0.022118301154473812 +gem_xsum,1,article_DOC_summary,rouge2_fmeasure,0.015956828653791853 +gem_xsum,1,median,rouge2_fmeasure,0.015956828653791853 +gem_xsum,2,article_DOC_summary,rouge2_fmeasure,0.01630470699791051 +gem_xsum,2,median,rouge2_fmeasure,0.01630470699791051 +gem_xsum,3,article_DOC_summary,rouge2_fmeasure,0.016120835877292777 +gem_xsum,3,median,rouge2_fmeasure,0.016120835877292777 +gem_xsum,4,article_DOC_summary,rouge2_fmeasure,0.00491498426582168 +gem_xsum,4,median,rouge2_fmeasure,0.00491498426582168 +gem_xsum,5,article_DOC_summary,rouge2_fmeasure,0.0004684279631865969 +gem_xsum,5,median,rouge2_fmeasure,0.0004684279631865969 +gem_xsum,5,average,multiple,0.012647347485412871 +web_nlg_en,0,PALM_prompt,rouge2_fmeasure,0.03937584218396654 +web_nlg_en,0,median,rouge2_fmeasure,0.03937584218396654 +web_nlg_en,1,PALM_prompt,rouge2_fmeasure,0.03540060823925364 +web_nlg_en,1,median,rouge2_fmeasure,0.03540060823925364 +web_nlg_en,2,PALM_prompt,rouge2_fmeasure,0.03014855551830984 +web_nlg_en,2,median,rouge2_fmeasure,0.03014855551830984 +web_nlg_en,3,PALM_prompt,rouge2_fmeasure,0.029728278072032675 +web_nlg_en,3,median,rouge2_fmeasure,0.029728278072032675 +web_nlg_en,4,PALM_prompt,rouge2_fmeasure,0.030133163651116946 +web_nlg_en,4,median,rouge2_fmeasure,0.030133163651116946 +web_nlg_en,5,PALM_prompt,rouge2_fmeasure,0.03067588405791704 +web_nlg_en,5,median,rouge2_fmeasure,0.03067588405791704 +web_nlg_en,5,average,multiple,0.03257705528709945 +wiki_lingua_en,0,tldr_en,rouge2_fmeasure,0.01120473259619999 +wiki_lingua_en,0,median,rouge2_fmeasure,0.01120473259619999 +wiki_lingua_en,1,tldr_en,rouge2_fmeasure,0.014185318776824654 +wiki_lingua_en,1,median,rouge2_fmeasure,0.014185318776824654 +wiki_lingua_en,2,tldr_en,rouge2_fmeasure,0.010491267852697777 +wiki_lingua_en,2,median,rouge2_fmeasure,0.010491267852697777 +wiki_lingua_en,3,tldr_en,rouge2_fmeasure,0.011959535486630863 +wiki_lingua_en,3,median,rouge2_fmeasure,0.011959535486630863 +wiki_lingua_en,4,tldr_en,rouge2_fmeasure,0.004346926730043043 +wiki_lingua_en,4,median,rouge2_fmeasure,0.004346926730043043 +wiki_lingua_en,5,tldr_en,rouge2_fmeasure,0.0008163688826715328 +wiki_lingua_en,5,median,rouge2_fmeasure,0.0008163688826715328 +wiki_lingua_en,5,average,multiple,0.008834025054177977 diff --git a/421m3b93b9/evaluation/generation/merged.json b/421m3b93b9/evaluation/generation/merged.json new file mode 100644 index 0000000000000000000000000000000000000000..e1f147790108f415b3ecb5f77d18630e2966e8da --- /dev/null +++ b/421m3b93b9/evaluation/generation/merged.json @@ -0,0 +1 @@ +{"GEM/web_nlg_en": {"0": {"PALM_prompt": {"bleu": 0.2514375429625041, "bleu_stderr": 0.021998769689162215, "rouge1_fmeasure": 0.08324727569094531, "rouge1_fmeasure_stderr": 0.0016897087122561929, "rouge1_precision": 0.05458209453689849, "rouge1_precision_stderr": 0.0013170294944666171, "rouge1_recall": 0.2336973485576593, "rouge1_recall_stderr": 0.0040096086479517205, "rouge2_fmeasure": 0.03937584218396654, "rouge2_fmeasure_stderr": 0.0010694968357709916, "rouge2_precision": 0.02537832167404535, "rouge2_precision_stderr": 0.0007376731654434439, "rouge2_recall": 0.11385934611253304, "rouge2_recall_stderr": 0.0027982806538345414, "rougeL_fmeasure": 0.08142923304713769, "rougeL_fmeasure_stderr": 0.0016238266133227941, "rougeL_precision": 0.053290497762677254, "rougeL_precision_stderr": 0.0012667710338326798, "rougeL_recall": 0.23006581827203837, "rougeL_recall_stderr": 0.003953117095365217, "rougeLsum_fmeasure": 0.07983781819186882, "rougeLsum_fmeasure_stderr": 0.0015754149459580782, "rougeLsum_precision": 0.05232107137991547, "rougeLsum_precision_stderr": 0.0012444155901422537, "rougeLsum_recall": 0.22537101759593997, "rougeLsum_recall_stderr": 0.0037776155749755847}}, "1": {"PALM_prompt": {"bleu": 0.21769111537226385, "bleu_stderr": 0.03137697858822512, "rouge1_fmeasure": 0.07951030843670538, "rouge1_fmeasure_stderr": 0.001723017279624104, "rouge1_precision": 0.052001919875535305, "rouge1_precision_stderr": 0.001321082636852792, "rouge1_recall": 0.23988446859803386, "rouge1_recall_stderr": 0.004156869077188399, "rouge2_fmeasure": 0.03540060823925364, "rouge2_fmeasure_stderr": 0.0010747042112953737, "rouge2_precision": 0.023111231906370898, "rouge2_precision_stderr": 0.0007776060967722213, "rouge2_recall": 0.10329178271134923, "rouge2_recall_stderr": 0.002685870512933402, "rougeL_fmeasure": 0.07686062327992164, "rougeL_fmeasure_stderr": 0.0016204699207326648, "rougeL_precision": 0.050152137757026496, "rougeL_precision_stderr": 0.0012390593186375923, "rougeL_recall": 0.23295789943347153, "rougeL_recall_stderr": 0.003998023116442341, "rougeLsum_fmeasure": 0.07589271888993057, "rougeLsum_fmeasure_stderr": 0.0016212233683597064, "rougeLsum_precision": 0.049657111072679484, "rougeLsum_precision_stderr": 0.001251376403565292, "rougeLsum_recall": 0.22830496660590702, "rougeLsum_recall_stderr": 0.003848392166869495}}, "2": {"PALM_prompt": {"bleu": 0.13233131077367935, "bleu_stderr": 0.020754119796718328, "rouge1_fmeasure": 0.072229178898602, "rouge1_fmeasure_stderr": 0.001543092956764265, "rouge1_precision": 0.04657043035383532, "rouge1_precision_stderr": 0.0011239018871889047, "rouge1_recall": 0.22310195073143146, "rouge1_recall_stderr": 0.0036710595238463504, "rouge2_fmeasure": 0.03014855551830984, "rouge2_fmeasure_stderr": 0.0009380925507866237, "rouge2_precision": 0.019500049975035447, "rouge2_precision_stderr": 0.000655019928228548, "rouge2_recall": 0.08944246985230496, "rouge2_recall_stderr": 0.0024649243606626467, "rougeL_fmeasure": 0.07016923751429241, "rougeL_fmeasure_stderr": 0.001463021965933563, "rougeL_precision": 0.045134841152731975, "rougeL_precision_stderr": 0.0010504863598865273, "rougeL_recall": 0.2167111755014583, "rougeL_recall_stderr": 0.0035124177345890236, "rougeLsum_fmeasure": 0.06958672859211196, "rougeLsum_fmeasure_stderr": 0.0014866571421992408, "rougeLsum_precision": 0.044868836291338345, "rougeLsum_precision_stderr": 0.0010785128355174395, "rougeLsum_recall": 0.2140025608028596, "rougeLsum_recall_stderr": 0.0035049787217478366}}, "3": {"PALM_prompt": {"bleu": 0.13947101812447582, "bleu_stderr": 0.018315087240793882, "rouge1_fmeasure": 0.07142372334512734, "rouge1_fmeasure_stderr": 0.0014967716009997196, "rouge1_precision": 0.04628425620736115, "rouge1_precision_stderr": 0.0011906227868207762, "rouge1_recall": 0.22162628233257115, "rouge1_recall_stderr": 0.003699603412762138, "rouge2_fmeasure": 0.029728278072032675, "rouge2_fmeasure_stderr": 0.0009115508980129494, "rouge2_precision": 0.01900801974071657, "rouge2_precision_stderr": 0.0006225076787649264, "rouge2_recall": 0.09082883316536437, "rouge2_recall_stderr": 0.0024924784743755716, "rougeL_fmeasure": 0.06951257011480294, "rougeL_fmeasure_stderr": 0.0014353167256811218, "rougeL_precision": 0.0449857247777386, "rougeL_precision_stderr": 0.0011445457261440822, "rougeL_recall": 0.2157765528802989, "rougeL_recall_stderr": 0.003568608064613849, "rougeLsum_fmeasure": 0.06870694683333799, "rougeLsum_fmeasure_stderr": 0.0014430132276342272, "rougeLsum_precision": 0.044538020363677544, "rougeLsum_precision_stderr": 0.001153245572021182, "rougeLsum_recall": 0.21261549130389004, "rougeLsum_recall_stderr": 0.003555356149389382}}, "4": {"PALM_prompt": {"bleu": 0.1527953877181468, "bleu_stderr": 0.019768736235517084, "rouge1_fmeasure": 0.07322467651453785, "rouge1_fmeasure_stderr": 0.0015776011765047185, "rouge1_precision": 0.049405661249387195, "rouge1_precision_stderr": 0.0015067654970712654, "rouge1_recall": 0.21945812028893277, "rouge1_recall_stderr": 0.0038030401302535473, "rouge2_fmeasure": 0.030133163651116946, "rouge2_fmeasure_stderr": 0.000946448138374341, "rouge2_precision": 0.019507736069696385, "rouge2_precision_stderr": 0.0006635980926413334, "rouge2_recall": 0.0898799136549315, "rouge2_recall_stderr": 0.0025235606959418174, "rougeL_fmeasure": 0.07052992790200849, "rougeL_fmeasure_stderr": 0.0014961569169770967, "rougeL_precision": 0.04748079297445503, "rougeL_precision_stderr": 0.0014455281648470706, "rougeL_recall": 0.21167906766927475, "rougeL_recall_stderr": 0.003606124529568187, "rougeLsum_fmeasure": 0.07026457828396626, "rougeLsum_fmeasure_stderr": 0.0015085357222444655, "rougeLsum_precision": 0.047420187913872876, "rougeLsum_precision_stderr": 0.00146239713460076, "rougeLsum_recall": 0.21044965879733385, "rougeLsum_recall_stderr": 0.0036184924163132487}}, "5": {"PALM_prompt": {"bleu": 0.1543511832741751, "bleu_stderr": 0.016910234829161125, "rouge1_fmeasure": 0.0752860133282323, "rouge1_fmeasure_stderr": 0.0016079889297565108, "rouge1_precision": 0.04897180945064415, "rouge1_precision_stderr": 0.0011879866388460427, "rouge1_recall": 0.22641617291173682, "rouge1_recall_stderr": 0.0038708800569893387, "rouge2_fmeasure": 0.03067588405791704, "rouge2_fmeasure_stderr": 0.000990869627019482, "rouge2_precision": 0.01979288435902756, "rouge2_precision_stderr": 0.0006930205249820375, "rouge2_recall": 0.0924249139436825, "rouge2_recall_stderr": 0.002601472833120827, "rougeL_fmeasure": 0.07241506380370862, "rougeL_fmeasure_stderr": 0.0015195422798472567, "rougeL_precision": 0.04693858845286846, "rougeL_precision_stderr": 0.0011062268749065108, "rougeL_recall": 0.21842890172009044, "rougeL_recall_stderr": 0.0037028076196281346, "rougeLsum_fmeasure": 0.07218362769944814, "rougeLsum_fmeasure_stderr": 0.00152233932545025, "rougeLsum_precision": 0.04684600487139167, "rougeLsum_precision_stderr": 0.0011143004580429542, "rougeLsum_recall": 0.21758509824855146, "rougeLsum_recall_stderr": 0.0037100549914324075}}}, "GEM/wiki_lingua_en": {"0": {"tldr_en": {"bleu": 0.5006634552225351, "bleu_stderr": 0.044910978275131574, "rouge1_fmeasure": 0.09500586912764274, "rouge1_fmeasure_stderr": 0.001581030847773693, "rouge1_precision": 0.08339163245487013, "rouge1_precision_stderr": 0.0015701734542072692, "rouge1_recall": 0.1339614422246552, "rouge1_recall_stderr": 0.0022063646191760266, "rouge2_fmeasure": 0.01120473259619999, "rouge2_fmeasure_stderr": 0.0004805228056876556, "rouge2_precision": 0.00971079106803878, "rouge2_precision_stderr": 0.00043115845148404607, "rouge2_recall": 0.016507607826228517, "rouge2_recall_stderr": 0.0008300127388871056, "rougeL_fmeasure": 0.08483002284596712, "rougeL_fmeasure_stderr": 0.0013213133984729892, "rougeL_precision": 0.0740161323947552, "rougeL_precision_stderr": 0.0013030587331252655, "rougeL_recall": 0.12102487819642505, "rougeL_recall_stderr": 0.0019286216477420524, "rougeLsum_fmeasure": 0.08946714159842706, "rougeLsum_fmeasure_stderr": 0.0014568402519778134, "rougeLsum_precision": 0.07840425802379598, "rougeLsum_precision_stderr": 0.0014506133818074898, "rougeLsum_recall": 0.1266093161972992, "rougeLsum_recall_stderr": 0.002050758180318845}}, "1": {"tldr_en": {"bleu": 0.7308707235181653, "bleu_stderr": 0.03624491848839971, "rouge1_fmeasure": 0.12650922949233337, "rouge1_fmeasure_stderr": 0.0015776384826867381, "rouge1_precision": 0.11154914738842088, "rouge1_precision_stderr": 0.001565926577691468, "rouge1_recall": 0.17837787236817565, "rouge1_recall_stderr": 0.0022939374135331763, "rouge2_fmeasure": 0.014185318776824654, "rouge2_fmeasure_stderr": 0.0005539952463359423, "rouge2_precision": 0.012303282741217402, "rouge2_precision_stderr": 0.0004977312922919948, "rouge2_recall": 0.02120986919186197, "rouge2_recall_stderr": 0.000961024479567027, "rougeL_fmeasure": 0.093445358194514, "rougeL_fmeasure_stderr": 0.0010265579811074785, "rougeL_precision": 0.0819807938213227, "rougeL_precision_stderr": 0.0010365171738526949, "rougeL_recall": 0.1347716870219141, "rougeL_recall_stderr": 0.0016852072346891771, "rougeLsum_fmeasure": 0.11937816894449679, "rougeLsum_fmeasure_stderr": 0.0014610839777197875, "rougeLsum_precision": 0.10520376697434682, "rougeLsum_precision_stderr": 0.0014560555302690096, "rougeLsum_recall": 0.16883413702342775, "rougeLsum_recall_stderr": 0.0021514249258459766}}, "2": {"tldr_en": {"bleu": 0.574275497631973, "bleu_stderr": 0.03208608000882317, "rouge1_fmeasure": 0.11493621326625231, "rouge1_fmeasure_stderr": 0.0014165434365593652, "rouge1_precision": 0.10718496730177166, "rouge1_precision_stderr": 0.0015158574960997943, "rouge1_recall": 0.15820343048358307, "rouge1_recall_stderr": 0.0021422121164417526, "rouge2_fmeasure": 0.010491267852697777, "rouge2_fmeasure_stderr": 0.00045750208006263256, "rouge2_precision": 0.009485434740860894, "rouge2_precision_stderr": 0.0004499838163545394, "rouge2_recall": 0.016060981276017856, "rouge2_recall_stderr": 0.0008780026374129505, "rougeL_fmeasure": 0.08905184570291635, "rougeL_fmeasure_stderr": 0.0010026383889898429, "rougeL_precision": 0.0828490160772031, "rougeL_precision_stderr": 0.0010992324075365162, "rougeL_recall": 0.1250415287204922, "rougeL_recall_stderr": 0.0016858824852820932, "rougeLsum_fmeasure": 0.10867696221681347, "rougeLsum_fmeasure_stderr": 0.0013166067421143065, "rougeLsum_precision": 0.10147571850767263, "rougeLsum_precision_stderr": 0.0014253846921573384, "rougeLsum_recall": 0.1496723021555078, "rougeLsum_recall_stderr": 0.001999788388601054}}, "3": {"tldr_en": {"bleu": 0.8154178741377799, "bleu_stderr": 0.0711428685141705, "rouge1_fmeasure": 0.09863778696344866, "rouge1_fmeasure_stderr": 0.0016224746347034249, "rouge1_precision": 0.10117615812697052, "rouge1_precision_stderr": 0.002001765686505826, "rouge1_recall": 0.13395309268486705, "rouge1_recall_stderr": 0.0024326536614082067, "rouge2_fmeasure": 0.011959535486630863, "rouge2_fmeasure_stderr": 0.0005266925357857687, "rouge2_precision": 0.012462726953274111, "rouge2_precision_stderr": 0.0006845656283001023, "rouge2_recall": 0.01804598548128886, "rouge2_recall_stderr": 0.0009715950225847244, "rougeL_fmeasure": 0.08014064782546042, "rougeL_fmeasure_stderr": 0.001255446986058769, "rougeL_precision": 0.08228663390439268, "rougeL_precision_stderr": 0.0016068246788563708, "rougeL_recall": 0.11039159364883624, "rougeL_recall_stderr": 0.002002607001174257, "rougeLsum_fmeasure": 0.09253590049962912, "rougeLsum_fmeasure_stderr": 0.0015061332916376507, "rougeLsum_precision": 0.09508913606615355, "rougeLsum_precision_stderr": 0.0018770902467542421, "rougeLsum_recall": 0.12595075159640753, "rougeLsum_recall_stderr": 0.002280111696329522}}, "4": {"tldr_en": {"bleu": 0.1304924586039303, "bleu_stderr": 0.015544038110929517, "rouge1_fmeasure": 0.033416501266504285, "rouge1_fmeasure_stderr": 0.0012811365874590627, "rouge1_precision": 0.03898357731611677, "rouge1_precision_stderr": 0.0017825874804167753, "rouge1_recall": 0.04532969573622113, "rouge1_recall_stderr": 0.0018850566541848744, "rouge2_fmeasure": 0.004346926730043043, "rouge2_fmeasure_stderr": 0.0003414378126847235, "rouge2_precision": 0.00575301955053695, "rouge2_precision_stderr": 0.0006865146047287286, "rouge2_recall": 0.006448105016916232, "rouge2_recall_stderr": 0.0006394944930278906, "rougeL_fmeasure": 0.028002192357454752, "rougeL_fmeasure_stderr": 0.0010559374376441348, "rougeL_precision": 0.033141939533690275, "rougeL_precision_stderr": 0.001553685707066213, "rougeL_recall": 0.03825941205326226, "rougeL_recall_stderr": 0.0015857656133650632, "rougeLsum_fmeasure": 0.03105060959588777, "rougeLsum_fmeasure_stderr": 0.0011822792638006734, "rougeLsum_precision": 0.0365582282355839, "rougeLsum_precision_stderr": 0.0016924791527445587, "rougeLsum_recall": 0.0421362580870465, "rougeLsum_recall_stderr": 0.0017481195713651538}}, "5": {"tldr_en": {"bleu": 1.7187355603562817e-08, "bleu_stderr": 5.0484493431935596e-08, "rouge1_fmeasure": 0.005735850006894939, "rouge1_fmeasure_stderr": 0.0006087235917222238, "rouge1_precision": 0.006680699820051665, "rouge1_precision_stderr": 0.0007831449627821159, "rouge1_recall": 0.008032946692221482, "rouge1_recall_stderr": 0.0009295621673176173, "rouge2_fmeasure": 0.0008163688826715328, "rouge2_fmeasure_stderr": 0.00016062789042140316, "rouge2_precision": 0.0009402204716120495, "rouge2_precision_stderr": 0.0002685696838656648, "rouge2_recall": 0.0014701868415047995, "rouge2_recall_stderr": 0.00040920046688979096, "rougeL_fmeasure": 0.004789842927248061, "rougeL_fmeasure_stderr": 0.0004998401506731663, "rougeL_precision": 0.005734541876408672, "rougeL_precision_stderr": 0.000687876116925154, "rougeL_recall": 0.006799717290334942, "rougeL_recall_stderr": 0.0008033313544632205, "rougeLsum_fmeasure": 0.005327313824531753, "rougeLsum_fmeasure_stderr": 0.0005639936965640789, "rougeLsum_precision": 0.006248631750092553, "rougeLsum_precision_stderr": 0.0007392179272479729, "rougeLsum_recall": 0.0075027920259527195, "rougeLsum_recall_stderr": 0.0008756411251168826}}}, "e2e_nlg_cleaned": {"0": {"generate_text_restaurant": {"bleu": 0.01680243748394886, "bleu_stderr": 0.002698090075425261, "rouge1_fmeasure": 0.14662019660341005, "rouge1_fmeasure_stderr": 0.0009853032775837527, "rouge1_precision": 0.6433261276659951, "rouge1_precision_stderr": 0.0029218316233115605, "rouge1_recall": 0.08476188184739113, "rouge1_recall_stderr": 0.0006754266013801586, "rouge2_fmeasure": 0.07673989711292045, "rouge2_fmeasure_stderr": 0.0007457397869014458, "rouge2_precision": 0.46212847085152825, "rouge2_precision_stderr": 0.003642947152471072, "rouge2_recall": 0.04262636012387003, "rouge2_recall_stderr": 0.0004484611360318446, "rougeL_fmeasure": 0.14645605673737266, "rougeL_fmeasure_stderr": 0.0009826864611526293, "rougeL_precision": 0.6426733498882173, "rougeL_precision_stderr": 0.0029154716414218082, "rougeL_recall": 0.08466498168636054, "rougeL_recall_stderr": 0.0006738662721002983, "rougeLsum_fmeasure": 0.14626033409401368, "rougeLsum_fmeasure_stderr": 0.0009813673062461156, "rougeLsum_precision": 0.6423963007669804, "rougeLsum_precision_stderr": 0.0029201140300377524, "rougeLsum_recall": 0.08440337916364413, "rougeLsum_recall_stderr": 0.0006553411263838314}}, "1": {"generate_text_restaurant": {"bleu": 3.606807825191092, "bleu_stderr": 0.09591000011528884, "rouge1_fmeasure": 0.2785732199964262, "rouge1_fmeasure_stderr": 0.0021267678733130654, "rouge1_precision": 0.33812705186617115, "rouge1_precision_stderr": 0.0030059547853716175, "rouge1_recall": 0.2656622288074228, "rouge1_recall_stderr": 0.0023779546588631423, "rouge2_fmeasure": 0.07187122797474006, "rouge2_fmeasure_stderr": 0.0013330899113155933, "rouge2_precision": 0.09524146435617294, "rouge2_precision_stderr": 0.0024246810891708813, "rouge2_recall": 0.06840391458761358, "rouge2_recall_stderr": 0.001331519655928045, "rougeL_fmeasure": 0.20361863606382402, "rougeL_fmeasure_stderr": 0.001641365236629229, "rougeL_precision": 0.2510116567489667, "rougeL_precision_stderr": 0.0026151315715530056, "rougeL_recall": 0.19361887293082036, "rougeL_recall_stderr": 0.0018123219187092408, "rougeLsum_fmeasure": 0.2296849019393887, "rougeLsum_fmeasure_stderr": 0.0018950866763813939, "rougeLsum_precision": 0.28122013134381013, "rougeLsum_precision_stderr": 0.0028178130895461914, "rougeLsum_recall": 0.2186184944989252, "rougeLsum_recall_stderr": 0.002073233154182344}}, "2": {"generate_text_restaurant": {"bleu": 3.992792767047178, "bleu_stderr": 0.09725526110232631, "rouge1_fmeasure": 0.2819453651202342, "rouge1_fmeasure_stderr": 0.0022491484131099567, "rouge1_precision": 0.33608162616426657, "rouge1_precision_stderr": 0.0031714291275481456, "rouge1_recall": 0.27361700767297814, "rouge1_recall_stderr": 0.002511311849708488, "rouge2_fmeasure": 0.07804530186588765, "rouge2_fmeasure_stderr": 0.0013694663864768511, "rouge2_precision": 0.10251377541302573, "rouge2_precision_stderr": 0.002529512493704511, "rouge2_recall": 0.07550238911894418, "rouge2_recall_stderr": 0.0013919432182609326, "rougeL_fmeasure": 0.20544412900207384, "rougeL_fmeasure_stderr": 0.0016999081653822567, "rougeL_precision": 0.2493183914789315, "rougeL_precision_stderr": 0.0027338010573676377, "rougeL_recall": 0.1989015651153766, "rougeL_recall_stderr": 0.00188976140279768, "rougeLsum_fmeasure": 0.23257355340408511, "rougeLsum_fmeasure_stderr": 0.0019945764398285565, "rougeLsum_precision": 0.27995922095515885, "rougeLsum_precision_stderr": 0.0029605640783204592, "rougeLsum_recall": 0.22532233392524176, "rougeLsum_recall_stderr": 0.00219041995304143}}, "3": {"generate_text_restaurant": {"bleu": 4.542589846928786, "bleu_stderr": 0.11022442742688977, "rouge1_fmeasure": 0.2898476701675653, "rouge1_fmeasure_stderr": 0.0022762938721868015, "rouge1_precision": 0.3532606000909218, "rouge1_precision_stderr": 0.003394671211548118, "rouge1_recall": 0.2790053105425374, "rouge1_recall_stderr": 0.0025333819014360984, "rouge2_fmeasure": 0.08636994486944548, "rouge2_fmeasure_stderr": 0.0014768318615889233, "rouge2_precision": 0.11778544433669538, "rouge2_precision_stderr": 0.002806905841317741, "rouge2_recall": 0.08272326311443293, "rouge2_recall_stderr": 0.0015245876152504886, "rougeL_fmeasure": 0.21152763709695743, "rougeL_fmeasure_stderr": 0.0017565192258644194, "rougeL_precision": 0.26309518558047507, "rougeL_precision_stderr": 0.003000419731819453, "rougeL_recall": 0.20306642059186092, "rougeL_recall_stderr": 0.0019374318026212406, "rougeLsum_fmeasure": 0.23961551003163925, "rougeLsum_fmeasure_stderr": 0.002030612765036443, "rougeLsum_precision": 0.29542533245841884, "rougeLsum_precision_stderr": 0.003198053867673902, "rougeLsum_recall": 0.2301634245125563, "rougeLsum_recall_stderr": 0.0022249235590292974}}, "4": {"generate_text_restaurant": {"bleu": 5.114285558003046, "bleu_stderr": 0.20109382641670068, "rouge1_fmeasure": 0.29986918539212454, "rouge1_fmeasure_stderr": 0.002342588306645915, "rouge1_precision": 0.3733235088964239, "rouge1_precision_stderr": 0.003609553087130593, "rouge1_recall": 0.2877821560581824, "rouge1_recall_stderr": 0.0026362691288050522, "rouge2_fmeasure": 0.09735409373330947, "rouge2_fmeasure_stderr": 0.0015722322876525361, "rouge2_precision": 0.13637647673895614, "rouge2_precision_stderr": 0.003086580748456374, "rouge2_recall": 0.09285242050197177, "rouge2_recall_stderr": 0.0016087031680355136, "rougeL_fmeasure": 0.22122022721608414, "rougeL_fmeasure_stderr": 0.0018554112374430712, "rougeL_precision": 0.28242300696080636, "rougeL_precision_stderr": 0.0032929610979490643, "rougeL_recall": 0.21130880883099537, "rougeL_recall_stderr": 0.002044919631479113, "rougeLsum_fmeasure": 0.24924810659702057, "rougeLsum_fmeasure_stderr": 0.002104346485260036, "rougeLsum_precision": 0.3151065011267256, "rougeLsum_precision_stderr": 0.0034586989073176035, "rougeLsum_recall": 0.23813807804934384, "rougeLsum_recall_stderr": 0.00230934645012491}}, "5": {"generate_text_restaurant": {"bleu": 5.632987033503432, "bleu_stderr": 0.12270638039694408, "rouge1_fmeasure": 0.313198984692364, "rouge1_fmeasure_stderr": 0.002298889735236565, "rouge1_precision": 0.39620617849070117, "rouge1_precision_stderr": 0.003692529570425549, "rouge1_recall": 0.2988258431534535, "rouge1_recall_stderr": 0.0026154059096158704, "rouge2_fmeasure": 0.10879095982587014, "rouge2_fmeasure_stderr": 0.001637357575795527, "rouge2_precision": 0.15550185624641005, "rouge2_precision_stderr": 0.0033115072925049963, "rouge2_recall": 0.10296881934063905, "rouge2_recall_stderr": 0.0016637455699906201, "rougeL_fmeasure": 0.23395898887358021, "rougeL_fmeasure_stderr": 0.0018573982535088618, "rougeL_precision": 0.30318450489358567, "rougeL_precision_stderr": 0.0034204881401338435, "rougeL_recall": 0.2224452039754921, "rougeL_recall_stderr": 0.002075351954997885, "rougeLsum_fmeasure": 0.2625181349452857, "rougeLsum_fmeasure_stderr": 0.0021061093071915913, "rougeLsum_precision": 0.3365106811613415, "rougeLsum_precision_stderr": 0.0035625351600787124, "rougeLsum_recall": 0.249899897084878, "rougeLsum_recall_stderr": 0.0023428692540716743}}}, "gem_xsum": {"0": {"article_DOC_summary": {"bleu": 0.7983779878069583, "bleu_stderr": 0.08407645621525846, "rouge1_fmeasure": 0.1352322775766746, "rouge1_fmeasure_stderr": 0.002093147476186513, "rouge1_precision": 0.09771365246979172, "rouge1_precision_stderr": 0.0016248674407969557, "rouge1_recall": 0.23650282724178925, "rouge1_recall_stderr": 0.003565646784197251, "rouge2_fmeasure": 0.022118301154473812, "rouge2_fmeasure_stderr": 0.0010229294547598842, "rouge2_precision": 0.01591614621801506, "rouge2_precision_stderr": 0.0007733721524986365, "rouge2_recall": 0.039557433327920236, "rouge2_recall_stderr": 0.0018240383271500894, "rougeL_fmeasure": 0.11848768408472485, "rougeL_fmeasure_stderr": 0.0017014410425288265, "rougeL_precision": 0.08548043307254967, "rougeL_precision_stderr": 0.0013203675221128862, "rougeL_recall": 0.20805176836389447, "rougeL_recall_stderr": 0.002981835490749998, "rougeLsum_fmeasure": 0.10492817925826417, "rougeLsum_fmeasure_stderr": 0.0016648037816994427, "rougeLsum_precision": 0.0757578176731396, "rougeLsum_precision_stderr": 0.0012941776669993434, "rougeLsum_recall": 0.18487501359322192, "rougeLsum_recall_stderr": 0.002939294199863662}}, "1": {"article_DOC_summary": {"bleu": 0.5795358900599222, "bleu_stderr": 0.08200609097903194, "rouge1_fmeasure": 0.12464195903723292, "rouge1_fmeasure_stderr": 0.001986282187084721, "rouge1_precision": 0.08873212210300226, "rouge1_precision_stderr": 0.001484865853449878, "rouge1_recall": 0.21862820911914055, "rouge1_recall_stderr": 0.0033350892165413724, "rouge2_fmeasure": 0.015956828653791853, "rouge2_fmeasure_stderr": 0.0008556268371531968, "rouge2_precision": 0.011323100811233888, "rouge2_precision_stderr": 0.0006143156724747702, "rouge2_recall": 0.028138369341575313, "rouge2_recall_stderr": 0.0014918182252256981, "rougeL_fmeasure": 0.10721848422534487, "rougeL_fmeasure_stderr": 0.0016173733464716808, "rougeL_precision": 0.07615953328644745, "rougeL_precision_stderr": 0.0012055159065040108, "rougeL_recall": 0.189233538409857, "rougeL_recall_stderr": 0.002774433671102753, "rougeLsum_fmeasure": 0.09633539960893688, "rougeLsum_fmeasure_stderr": 0.0015434800775443016, "rougeLsum_precision": 0.06837611993312843, "rougeLsum_precision_stderr": 0.0011392914654355624, "rougeLsum_recall": 0.17039042638815952, "rougeLsum_recall_stderr": 0.002688202908025971}}, "2": {"article_DOC_summary": {"bleu": 0.615246044289447, "bleu_stderr": 0.07561222529856154, "rouge1_fmeasure": 0.12031552187548465, "rouge1_fmeasure_stderr": 0.0019401023596279901, "rouge1_precision": 0.08540093432806928, "rouge1_precision_stderr": 0.0014365714497008954, "rouge1_recall": 0.21231148858831494, "rouge1_recall_stderr": 0.003293271964879966, "rouge2_fmeasure": 0.01630470699791051, "rouge2_fmeasure_stderr": 0.000846390835932116, "rouge2_precision": 0.011502007349166413, "rouge2_precision_stderr": 0.000604120354786913, "rouge2_recall": 0.029234857968137292, "rouge2_recall_stderr": 0.0015211619077998635, "rougeL_fmeasure": 0.10569353247950078, "rougeL_fmeasure_stderr": 0.0016112264732663693, "rougeL_precision": 0.07491986234635176, "rougeL_precision_stderr": 0.0011927622285722677, "rougeL_recall": 0.18731419594542628, "rougeL_recall_stderr": 0.002781788617302899, "rougeLsum_fmeasure": 0.0938181520328098, "rougeLsum_fmeasure_stderr": 0.0015105722754868353, "rougeLsum_precision": 0.06642701438881495, "rougeLsum_precision_stderr": 0.0011073815436868529, "rougeLsum_recall": 0.1667781953902699, "rougeLsum_recall_stderr": 0.0026526498512359333}}, "3": {"article_DOC_summary": {"bleu": 0.5363135385730404, "bleu_stderr": 0.03707981819107192, "rouge1_fmeasure": 0.11681165330668473, "rouge1_fmeasure_stderr": 0.00203678054821043, "rouge1_precision": 0.08494601064475496, "rouge1_precision_stderr": 0.0016533379054312243, "rouge1_recall": 0.20388079080231894, "rouge1_recall_stderr": 0.003487003634112906, "rouge2_fmeasure": 0.016120835877292777, "rouge2_fmeasure_stderr": 0.0008399416007048803, "rouge2_precision": 0.011605657050319455, "rouge2_precision_stderr": 0.0006267783397381562, "rouge2_recall": 0.02868802660607133, "rouge2_recall_stderr": 0.001500648134927792, "rougeL_fmeasure": 0.10324876660992884, "rougeL_fmeasure_stderr": 0.0017215298385734191, "rougeL_precision": 0.07512470071697985, "rougeL_precision_stderr": 0.0014388196947271717, "rougeL_recall": 0.18066276970166414, "rougeL_recall_stderr": 0.002953620796499381, "rougeLsum_fmeasure": 0.09173565213355828, "rougeLsum_fmeasure_stderr": 0.00156573820081352, "rougeLsum_precision": 0.06684741877574751, "rougeLsum_precision_stderr": 0.001337910085082421, "rougeLsum_recall": 0.16093917951269368, "rougeLsum_recall_stderr": 0.0027217417123091563}}, "4": {"article_DOC_summary": {"bleu": 0.36380702453237423, "bleu_stderr": 0.12315867751461915, "rouge1_fmeasure": 0.03321457300244443, "rouge1_fmeasure_stderr": 0.0018949279717989612, "rouge1_precision": 0.029193163671091177, "rouge1_precision_stderr": 0.0019225623982780473, "rouge1_recall": 0.05165986432248715, "rouge1_recall_stderr": 0.0029726627100479727, "rouge2_fmeasure": 0.00491498426582168, "rouge2_fmeasure_stderr": 0.0005142132536709081, "rouge2_precision": 0.0038735350521400523, "rouge2_precision_stderr": 0.000446624939881871, "rouge2_recall": 0.008240428320551699, "rouge2_recall_stderr": 0.0008999130022158124, "rougeL_fmeasure": 0.02937853466978305, "rougeL_fmeasure_stderr": 0.0016519082698269935, "rougeL_precision": 0.02589016692167336, "rougeL_precision_stderr": 0.0017307301951775165, "rougeL_recall": 0.0461329470241897, "rougeL_recall_stderr": 0.0026433535996646575, "rougeLsum_fmeasure": 0.026657181188817783, "rougeLsum_fmeasure_stderr": 0.001539518289253859, "rougeLsum_precision": 0.024004200518679002, "rougeLsum_precision_stderr": 0.0016916307100890552, "rougeLsum_recall": 0.041413519810500114, "rougeLsum_recall_stderr": 0.0024218231110929894}}, "5": {"article_DOC_summary": {"bleu": 4.115418103698432e-38, "bleu_stderr": 6.296520100128421e-34, "rouge1_fmeasure": 0.00277644807976023, "rouge1_fmeasure_stderr": 0.0007325746737680531, "rouge1_precision": 0.003092116221168736, "rouge1_precision_stderr": 0.0008391229048273843, "rouge1_recall": 0.002619968676809126, "rouge1_recall_stderr": 0.0006825595822719792, "rouge2_fmeasure": 0.0004684279631865969, "rouge2_fmeasure_stderr": 0.00017706692107499406, "rouge2_precision": 0.0005150702362134104, "rouge2_precision_stderr": 0.00019157235260488142, "rouge2_recall": 0.0004353324872192797, "rouge2_recall_stderr": 0.0001685056136899175, "rougeL_fmeasure": 0.0020346467233967516, "rougeL_fmeasure_stderr": 0.0005427546343352937, "rougeL_precision": 0.002222977376680864, "rougeL_precision_stderr": 0.0006043308708610969, "rougeL_recall": 0.0019631674265652684, "rougeL_recall_stderr": 0.0005268242286389718, "rougeLsum_fmeasure": 0.0021622748247738274, "rougeLsum_fmeasure_stderr": 0.0005841682012876381, "rougeLsum_precision": 0.0023634085002301602, "rougeLsum_precision_stderr": 0.0006503711974140844, "rougeLsum_recall": 0.0020809098481805793, "rougeLsum_recall_stderr": 0.0005629942602894516}}}} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json new file mode 100644 index 0000000000000000000000000000000000000000..d0d728d59fb308b8e6af896e85027d09fc9126cc --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.2514375429625041, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.021998769689162215 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.05458209453689849, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0013170294944666171 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.2336973485576593, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0040096086479517205 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.08324727569094531, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016897087122561929 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.02537832167404535, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007376731654434439 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.11385934611253304, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0027982806538345414 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03937584218396654, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010694968357709916 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.053290497762677254, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012667710338326798 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23006581827203837, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003953117095365217 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.08142923304713769, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0016238266133227941 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.05232107137991547, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0012444155901422537 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22537101759593997, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037776155749755847 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07983781819186882, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015754149459580782 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json new file mode 100644 index 0000000000000000000000000000000000000000..e279e8ff96434b5b0d16ac20a0e40871de1c2533 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.21769111537226385, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03137697858822512 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.052001919875535305, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001321082636852792 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.23988446859803386, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.004156869077188399 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07951030843670538, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001723017279624104 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.023111231906370898, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0007776060967722213 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.10329178271134923, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002685870512933402 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03540060823925364, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0010747042112953737 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.050152137757026496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0012390593186375923 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.23295789943347153, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003998023116442341 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07686062327992164, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0016204699207326648 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.049657111072679484, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001251376403565292 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.22830496660590702, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003848392166869495 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07589271888993057, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0016212233683597064 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json new file mode 100644 index 0000000000000000000000000000000000000000..26e91bc59910465d2aa70219e343dda0565cb3c4 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.13233131077367935, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.020754119796718328 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04657043035383532, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011239018871889047 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22310195073143146, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0036710595238463504 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.072229178898602, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001543092956764265 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.019500049975035447, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.000655019928228548 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.08944246985230496, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024649243606626467 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03014855551830984, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009380925507866237 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.045134841152731975, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010504863598865273 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2167111755014583, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0035124177345890236 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07016923751429241, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001463021965933563 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.044868836291338345, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0010785128355174395 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.2140025608028596, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0035049787217478366 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.06958672859211196, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014866571421992408 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json new file mode 100644 index 0000000000000000000000000000000000000000..5e68f7a3b823a4ecebee3b85e3ca7f879aa331ba --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.13947101812447582, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.018315087240793882 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04628425620736115, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011906227868207762 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22162628233257115, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.003699603412762138 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07142372334512734, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014967716009997196 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.01900801974071657, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006225076787649264 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.09082883316536437, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0024924784743755716 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.029728278072032675, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0009115508980129494 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.0449857247777386, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011445457261440822 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.2157765528802989, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003568608064613849 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.06951257011480294, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014353167256811218 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.044538020363677544, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.001153245572021182 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21261549130389004, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.003555356149389382 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.06870694683333799, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014430132276342272 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json new file mode 100644 index 0000000000000000000000000000000000000000..272fb2cabba6a8532715f14f4bf0e5c0dd2140a5 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1527953877181468, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.019768736235517084 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.049405661249387195, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015067654970712654 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.21945812028893277, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038030401302535473 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.07322467651453785, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015776011765047185 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.019507736069696385, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006635980926413334 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.0898799136549315, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0025235606959418174 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.030133163651116946, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000946448138374341 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04748079297445503, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0014455281648470706 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21167906766927475, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.003606124529568187 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07052992790200849, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0014961569169770967 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.047420187913872876, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.00146239713460076 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21044965879733385, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0036184924163132487 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07026457828396626, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015085357222444655 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json new file mode 100644 index 0000000000000000000000000000000000000000..0ee72d08878dd4ab4f99e3a6f4930cd20dc46592 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-web_nlg_en_PALM_prompt_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "bleu": 0.1543511832741751, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.016910234829161125 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_precision": 0.04897180945064415, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0011879866388460427 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_recall": 0.22641617291173682, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0038708800569893387 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge1_fmeasure": 0.0752860133282323, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016079889297565108 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_precision": 0.01979288435902756, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006930205249820375 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_recall": 0.0924249139436825, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.002601472833120827 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rouge2_fmeasure": 0.03067588405791704, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.000990869627019482 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_precision": 0.04693858845286846, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0011062268749065108 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_recall": 0.21842890172009044, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0037028076196281346 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeL_fmeasure": 0.07241506380370862, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0015195422798472567 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_precision": 0.04684600487139167, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0011143004580429542 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_recall": 0.21758509824855146, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0037100549914324075 + }, + { + "task_name": "GEM/web_nlg_en", + "prompt_name": "PALM_prompt", + "rougeLsum_fmeasure": 0.07218362769944814, + "dataset_path": "GEM/web_nlg", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.00152233932545025 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json new file mode 100644 index 0000000000000000000000000000000000000000..60e6605e01ba35597855adcd1bb67c10edae0331 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.08339163245487013, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015701734542072692 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.1339614422246552, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0022063646191760266 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09500586912764274, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.001581030847773693 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00971079106803878, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.00043115845148404607 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.016507607826228517, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008300127388871056 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.01120473259619999, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0004805228056876556 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0740161323947552, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0013030587331252655 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.12102487819642505, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0019286216477420524 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08483002284596712, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0013213133984729892 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.07840425802379598, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014506133818074898 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1266093161972992, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002050758180318845 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.08946714159842706, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014568402519778134 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.5006634552225351, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.044910978275131574 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json new file mode 100644 index 0000000000000000000000000000000000000000..2fda386a14d7702553e77531a0e7b54973155586 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.11154914738842088, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.001565926577691468 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.17837787236817565, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0022939374135331763 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.12650922949233337, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0015776384826867381 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012303282741217402, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004977312922919948 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.02120986919186197, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.000961024479567027 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.014185318776824654, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005539952463359423 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0819807938213227, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010365171738526949 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1347716870219141, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016852072346891771 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.093445358194514, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010265579811074785 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.10520376697434682, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014560555302690096 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.16883413702342775, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0021514249258459766 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.11937816894449679, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0014610839777197875 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.7308707235181653, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03624491848839971 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json new file mode 100644 index 0000000000000000000000000000000000000000..4cf296f095df64d2a2632a06421b8459f2ada1a4 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10718496730177166, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0015158574960997943 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.15820343048358307, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0021422121164417526 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.11493621326625231, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0014165434365593652 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.009485434740860894, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0004499838163545394 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.016060981276017856, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0008780026374129505 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.010491267852697777, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00045750208006263256 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.0828490160772031, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0010992324075365162 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.1250415287204922, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0016858824852820932 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08905184570291635, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010026383889898429 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.10147571850767263, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0014253846921573384 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.1496723021555078, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.001999788388601054 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.10867696221681347, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0013166067421143065 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.574275497631973, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.03208608000882317 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json new file mode 100644 index 0000000000000000000000000000000000000000..1dfbdf4c4dbe1d7396729d2e4b8978bed6970779 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.10117615812697052, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.002001765686505826 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.13395309268486705, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0024326536614082067 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.09863778696344866, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0016224746347034249 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.012462726953274111, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006845656283001023 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.01804598548128886, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0009715950225847244 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.011959535486630863, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0005266925357857687 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.08228663390439268, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.0016068246788563708 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.11039159364883624, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.002002607001174257 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.08014064782546042, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.001255446986058769 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.09508913606615355, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0018770902467542421 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.12595075159640753, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.002280111696329522 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.09253590049962912, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0015061332916376507 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.8154178741377799, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.0711428685141705 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json new file mode 100644 index 0000000000000000000000000000000000000000..c590eb27adc9336bab571db31f11b219567f0be6 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.03898357731611677, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0017825874804167753 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.04532969573622113, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0018850566541848744 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.033416501266504285, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0012811365874590627 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.00575301955053695, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0006865146047287286 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.006448105016916232, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.0006394944930278906 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.004346926730043043, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.0003414378126847235 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.033141939533690275, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.001553685707066213 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.03825941205326226, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0015857656133650632 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.028002192357454752, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0010559374376441348 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.0365582282355839, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0016924791527445587 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.0421362580870465, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0017481195713651538 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.03105060959588777, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0011822792638006734 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 0.1304924586039303, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 0.015544038110929517 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json new file mode 100644 index 0000000000000000000000000000000000000000..34c70752994e9871ebf86fc66baee88d90574c3a --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_GEM-wiki_lingua_en_tldr_en_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_precision": 0.006680699820051665, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_precision_stderr": 0.0007831449627821159 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_recall": 0.008032946692221482, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_recall_stderr": 0.0009295621673176173 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge1_fmeasure": 0.005735850006894939, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge1_fmeasure_stderr": 0.0006087235917222238 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_precision": 0.0009402204716120495, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_precision_stderr": 0.0002685696838656648 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_recall": 0.0014701868415047995, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_recall_stderr": 0.00040920046688979096 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rouge2_fmeasure": 0.0008163688826715328, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rouge2_fmeasure_stderr": 0.00016062789042140316 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_precision": 0.005734541876408672, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_precision_stderr": 0.000687876116925154 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_recall": 0.006799717290334942, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_recall_stderr": 0.0008033313544632205 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeL_fmeasure": 0.004789842927248061, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeL_fmeasure_stderr": 0.0004998401506731663 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_precision": 0.006248631750092553, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_precision_stderr": 0.0007392179272479729 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_recall": 0.0075027920259527195, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_recall_stderr": 0.0008756411251168826 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "rougeLsum_fmeasure": 0.005327313824531753, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0005639936965640789 + }, + { + "task_name": "GEM/wiki_lingua_en", + "prompt_name": "tldr_en", + "bleu": 1.7187355603562817e-08, + "dataset_path": "GEM/wiki_lingua", + "dataset_name": "en", + "subset": null, + "bleu_stderr": 5.0484493431935596e-08 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd9e499d373038945c2ffdb1b77b14b5eb02b2e --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 0.01680243748394886, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.002698090075425261 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.6433261276659951, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0029218316233115605 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.08476188184739113, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0006754266013801586 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.14662019660341005, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0009853032775837527 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.46212847085152825, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.003642947152471072 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.04262636012387003, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0004484611360318446 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07673989711292045, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0007457397869014458 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.6426733498882173, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0029154716414218082 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.08466498168636054, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0006738662721002983 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.14645605673737266, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0009826864611526293 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.6423963007669804, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029201140300377524 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.08440337916364413, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0006553411263838314 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.14626033409401368, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0009813673062461156 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json new file mode 100644 index 0000000000000000000000000000000000000000..0c0f1e41f0f993fad9e7bd67c57b5108ed593f23 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.606807825191092, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09591000011528884 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.33812705186617115, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0030059547853716175 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2656622288074228, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0023779546588631423 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2785732199964262, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0021267678733130654 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.09524146435617294, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0024246810891708813 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.06840391458761358, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.001331519655928045 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07187122797474006, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013330899113155933 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2510116567489667, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0026151315715530056 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.19361887293082036, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0018123219187092408 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20361863606382402, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.001641365236629229 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.28122013134381013, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0028178130895461914 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2186184944989252, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.002073233154182344 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2296849019393887, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0018950866763813939 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json new file mode 100644 index 0000000000000000000000000000000000000000..b8c44a0f9e0ccd81c4aad3d59ff17de931ec913b --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 3.992792767047178, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.09725526110232631 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.33608162616426657, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.0031714291275481456 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.27361700767297814, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.002511311849708488 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2819453651202342, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022491484131099567 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.10251377541302573, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002529512493704511 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.07550238911894418, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0013919432182609326 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.07804530186588765, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0013694663864768511 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.2493183914789315, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0027338010573676377 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.1989015651153766, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.00188976140279768 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.20544412900207384, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0016999081653822567 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.27995922095515885, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0029605640783204592 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.22532233392524176, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00219041995304143 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23257355340408511, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0019945764398285565 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json new file mode 100644 index 0000000000000000000000000000000000000000..adb38770ea1a00caf8e84aaefdb722ca3c25fa4d --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 4.542589846928786, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.11022442742688977 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3532606000909218, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003394671211548118 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2790053105425374, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0025333819014360984 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.2898476701675653, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.0022762938721868015 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.11778544433669538, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.002806905841317741 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.08272326311443293, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0015245876152504886 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.08636994486944548, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0014768318615889233 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.26309518558047507, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.003000419731819453 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.20306642059186092, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.0019374318026212406 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.21152763709695743, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0017565192258644194 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.29542533245841884, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.003198053867673902 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.2301634245125563, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0022249235590292974 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.23961551003163925, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002030612765036443 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json new file mode 100644 index 0000000000000000000000000000000000000000..0fe3b30471f73a4e7b742b0f5c8ae1ba67c33218 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 5.114285558003046, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.20109382641670068 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.3733235088964239, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003609553087130593 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2877821560581824, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026362691288050522 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.29986918539212454, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002342588306645915 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.13637647673895614, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.003086580748456374 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.09285242050197177, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016087031680355136 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.09735409373330947, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.0015722322876525361 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.28242300696080636, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0032929610979490643 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.21130880883099537, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002044919631479113 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.22122022721608414, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018554112374430712 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3151065011267256, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0034586989073176035 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.23813807804934384, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.00230934645012491 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.24924810659702057, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.002104346485260036 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json new file mode 100644 index 0000000000000000000000000000000000000000..902014b5035df54949373a40b33cbef51510aafa --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_e2e_nlg_cleaned_generate_text_restaurant_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "bleu": 5.632987033503432, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "bleu_stderr": 0.12270638039694408 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_precision": 0.39620617849070117, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_precision_stderr": 0.003692529570425549 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_recall": 0.2988258431534535, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_recall_stderr": 0.0026154059096158704 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge1_fmeasure": 0.313198984692364, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge1_fmeasure_stderr": 0.002298889735236565 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_precision": 0.15550185624641005, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_precision_stderr": 0.0033115072925049963 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_recall": 0.10296881934063905, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_recall_stderr": 0.0016637455699906201 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rouge2_fmeasure": 0.10879095982587014, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rouge2_fmeasure_stderr": 0.001637357575795527 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_precision": 0.30318450489358567, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_precision_stderr": 0.0034204881401338435 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_recall": 0.2224452039754921, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_recall_stderr": 0.002075351954997885 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeL_fmeasure": 0.23395898887358021, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeL_fmeasure_stderr": 0.0018573982535088618 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_precision": 0.3365106811613415, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_precision_stderr": 0.0035625351600787124 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_recall": 0.249899897084878, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_recall_stderr": 0.0023428692540716743 + }, + { + "task_name": "e2e_nlg_cleaned", + "prompt_name": "generate_text_restaurant", + "rougeLsum_fmeasure": 0.2625181349452857, + "dataset_path": "e2e_nlg_cleaned", + "dataset_name": null, + "subset": null, + "rougeLsum_fmeasure_stderr": 0.0021061093071915913 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_0.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_0.json new file mode 100644 index 0000000000000000000000000000000000000000..7e0a2b56b0bc84ed3e5007c5c4ceb11d80c06160 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_0.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.09771365246979172, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016248674407969557 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.23650282724178925, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003565646784197251 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.1352322775766746, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.002093147476186513 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.01591614621801506, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0007733721524986365 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.039557433327920236, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0018240383271500894 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.022118301154473812, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0010229294547598842 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.08548043307254967, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0013203675221128862 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.20805176836389447, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002981835490749998 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.11848768408472485, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017014410425288265 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0757578176731396, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0012941776669993434 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.18487501359322192, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002939294199863662 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.10492817925826417, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0016648037816994427 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.7983779878069583, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08407645621525846 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 0, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_1.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_1.json new file mode 100644 index 0000000000000000000000000000000000000000..b912026ebd051581dcfe77da32b9cf163732e658 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_1.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08873212210300226, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.001484865853449878 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21862820911914055, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0033350892165413724 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12464195903723292, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.001986282187084721 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.011323100811233888, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006143156724747702 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.028138369341575313, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0014918182252256981 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.015956828653791853, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008556268371531968 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07615953328644745, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0012055159065040108 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.189233538409857, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002774433671102753 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10721848422534487, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016173733464716808 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06837611993312843, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011392914654355624 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.17039042638815952, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.002688202908025971 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09633539960893688, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015434800775443016 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.5795358900599222, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.08200609097903194 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 1, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_2.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_2.json new file mode 100644 index 0000000000000000000000000000000000000000..f8db531bdaef986accc8134dcd08ef3361b8d4a5 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_2.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08540093432806928, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0014365714497008954 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.21231148858831494, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003293271964879966 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.12031552187548465, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0019401023596279901 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.011502007349166413, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.000604120354786913 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.029234857968137292, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0015211619077998635 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.01630470699791051, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.000846390835932116 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07491986234635176, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0011927622285722677 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18731419594542628, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002781788617302899 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10569353247950078, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016112264732663693 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06642701438881495, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0011073815436868529 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.1667781953902699, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0026526498512359333 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0938181520328098, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0015105722754868353 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.615246044289447, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.07561222529856154 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 2, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_3.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_3.json new file mode 100644 index 0000000000000000000000000000000000000000..115c08d6346e32cbbc85b2daaafedef9211486e6 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_3.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.08494601064475496, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0016533379054312243 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.20388079080231894, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.003487003634112906 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.11681165330668473, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.00203678054821043 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.011605657050319455, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.0006267783397381562 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.02868802660607133, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.001500648134927792 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.016120835877292777, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0008399416007048803 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.07512470071697985, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0014388196947271717 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.18066276970166414, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.002953620796499381 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.10324876660992884, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0017215298385734191 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.06684741877574751, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.001337910085082421 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.16093917951269368, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0027217417123091563 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.09173565213355828, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.00156573820081352 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.5363135385730404, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.03707981819107192 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 3, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_4.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_4.json new file mode 100644 index 0000000000000000000000000000000000000000..de22093799bb53b697795bd2fb48e280ee467578 --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_4.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.029193163671091177, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0019225623982780473 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.05165986432248715, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0029726627100479727 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.03321457300244443, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0018949279717989612 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0038735350521400523, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.000446624939881871 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.008240428320551699, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0008999130022158124 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.00491498426582168, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.0005142132536709081 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.02589016692167336, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0017307301951775165 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0461329470241897, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0026433535996646575 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.02937853466978305, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0016519082698269935 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.024004200518679002, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0016916307100890552 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.041413519810500114, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0024218231110929894 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.026657181188817783, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.001539518289253859 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 0.36380702453237423, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 0.12315867751461915 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 4, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_5.json b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_5.json new file mode 100644 index 0000000000000000000000000000000000000000..06f7b53b09fdee58dde4427edb4ad81dd02df8aa --- /dev/null +++ b/421m3b93b9/evaluation/generation/slim.421m3b93b9_gem_xsum_article_DOC_summary_5.json @@ -0,0 +1,133 @@ +{ + "results": [ + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_precision": 0.003092116221168736, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_precision_stderr": 0.0008391229048273843 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_recall": 0.002619968676809126, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_recall_stderr": 0.0006825595822719792 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge1_fmeasure": 0.00277644807976023, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge1_fmeasure_stderr": 0.0007325746737680531 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_precision": 0.0005150702362134104, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_precision_stderr": 0.00019157235260488142 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_recall": 0.0004353324872192797, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_recall_stderr": 0.0001685056136899175 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rouge2_fmeasure": 0.0004684279631865969, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rouge2_fmeasure_stderr": 0.00017706692107499406 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_precision": 0.002222977376680864, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_precision_stderr": 0.0006043308708610969 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_recall": 0.0019631674265652684, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_recall_stderr": 0.0005268242286389718 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeL_fmeasure": 0.0020346467233967516, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeL_fmeasure_stderr": 0.0005427546343352937 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_precision": 0.0023634085002301602, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_precision_stderr": 0.0006503711974140844 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_recall": 0.0020809098481805793, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_recall_stderr": 0.0005629942602894516 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "rougeLsum_fmeasure": 0.0021622748247738274, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "rougeLsum_fmeasure_stderr": 0.0005841682012876381 + }, + { + "task_name": "gem_xsum", + "prompt_name": "article_DOC_summary", + "bleu": 4.115418103698432e-38, + "dataset_path": "GEM/xsum", + "dataset_name": null, + "subset": "", + "bleu_stderr": 6.296520100128421e-34 + } + ], + "config": { + "model": "hf-causal", + "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-421m-3b9/421m3b93b9/transformers,use_accelerate=True,tokenizer=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/gpt2,dtype=bfloat16", + "task_args": "", + "num_fewshot": 5, + "batch_size": 16, + "device": "cuda", + "use_cache": false, + "limit": 3000, + "bootstrap_iters": 10, + "seed": 1234 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_0.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_0.csv new file mode 100644 index 0000000000000000000000000000000000000000..4814c36e7a0284ef468bbfa76656429f4a4a626e --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_0.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.356,0.015149042659306625,0 +anli_r2,acc,0.343,0.015019206922356953,0 +anli_r3,acc,0.34,0.013680495725767789,0 +arc_challenge,acc,0.17918088737201365,0.01120704521661567,0 +arc_challenge,acc_norm,0.2295221843003413,0.012288926760890795,0 +arc_easy,acc,0.41035353535353536,0.01009353125576546,0 +arc_easy,acc_norm,0.37247474747474746,0.009920469215736027,0 +boolq,acc,0.6207951070336392,0.008486012137246294,1 +cb,acc,0.35714285714285715,0.06460957383809221,1 +cb,f1,0.17777777777777778,,1 +copa,acc,0.6,0.04923659639173309,0 +hellaswag,acc,0.28121888070105555,0.004486752200430366,0 +hellaswag,acc_norm,0.295857398924517,0.0045549440206205,0 +piqa,acc,0.6425462459194777,0.011181692590867657,0 +piqa,acc_norm,0.6343852013057671,0.011236571679006277,0 +rte,acc,0.5306859205776173,0.03003973059219781,0 +sciq,acc,0.677,0.014794927843348635,0 +sciq,acc_norm,0.593,0.01554324910025554,0 +storycloze_2016,acc,0.5681453768038482,0.011454541812712438,0 +winogrande,acc,0.49013417521704816,0.014049749833367592,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_0.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_0.json new file mode 100644 index 0000000000000000000000000000000000000000..668ffb125a24f326f5db61ee1b4fb62f9e91c83f --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_0.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.356, + "acc_stderr": 0.015149042659306625 + }, + "anli_r2": { + "acc": 0.343, + "acc_stderr": 0.015019206922356953 + }, + "anli_r3": { + "acc": 0.34, + "acc_stderr": 0.013680495725767789 + }, + "cb": { + "acc": 0.35714285714285715, + "acc_stderr": 0.06460957383809221, + "f1": 0.17777777777777778 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.04923659639173309 + }, + "hellaswag": { + "acc": 0.28121888070105555, + "acc_stderr": 0.004486752200430366, + "acc_norm": 0.295857398924517, + "acc_norm_stderr": 0.0045549440206205 + }, + "rte": { + "acc": 0.5306859205776173, + "acc_stderr": 0.03003973059219781 + }, + "winogrande": { + "acc": 0.49013417521704816, + "acc_stderr": 0.014049749833367592 + }, + "storycloze_2016": { + "acc": 0.5681453768038482, + "acc_stderr": 0.011454541812712438 + }, + "boolq": { + "acc": 0.6207951070336392, + "acc_stderr": 0.008486012137246294 + }, + "arc_easy": { + "acc": 0.41035353535353536, + "acc_stderr": 0.01009353125576546, + "acc_norm": 0.37247474747474746, + "acc_norm_stderr": 0.009920469215736027 + }, + "arc_challenge": { + "acc": 0.17918088737201365, + "acc_stderr": 0.01120704521661567, + "acc_norm": 0.2295221843003413, + "acc_norm_stderr": 0.012288926760890795 + }, + "sciq": { + "acc": 0.677, + "acc_stderr": 0.014794927843348635, + "acc_norm": 0.593, + "acc_norm_stderr": 0.01554324910025554 + }, + "piqa": { + "acc": 0.6425462459194777, + "acc_stderr": 0.011181692590867657, + "acc_norm": 0.6343852013057671, + "acc_norm_stderr": 0.011236571679006277 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_1.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_1.csv new file mode 100644 index 0000000000000000000000000000000000000000..ae6fb1698f33c11e88c5790cfbea1ab3527f6a8a --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_1.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.334,0.014922019523732961,0 +anli_r2,acc,0.329,0.014865395385928364,0 +anli_r3,acc,0.3283333333333333,0.013562032919529022,0 +arc_challenge,acc,0.17406143344709898,0.01108017712948222,0 +arc_challenge,acc_norm,0.22098976109215018,0.012124929206818258,0 +arc_easy,acc,0.3838383838383838,0.009979061846649307,0 +arc_easy,acc_norm,0.3653198653198653,0.009880576614806928,0 +boolq,acc,0.5700305810397553,0.00865885369072926,1 +cb,acc,0.4107142857142857,0.0663363415035954,1 +cb,f1,0.2859195402298851,,1 +copa,acc,0.58,0.049604496374885836,0 +hellaswag,acc,0.28251344353714397,0.004493015945599715,0 +hellaswag,acc_norm,0.29356701852220674,0.004544651976040094,0 +piqa,acc,0.6349292709466812,0.01123302183055483,0 +piqa,acc_norm,0.6316648531011969,0.011254089354334362,0 +rte,acc,0.5054151624548736,0.030094698123239966,0 +sciq,acc,0.663,0.014955087918653605,0 +sciq,acc_norm,0.593,0.015543249100255542,0 +storycloze_2016,acc,0.5547835382148584,0.011492819519292362,0 +winogrande,acc,0.516179952644041,0.0140451261309786,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_1.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_1.json new file mode 100644 index 0000000000000000000000000000000000000000..de2ecb426775dd169aefab1d14d5d71d2eb4fe80 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_1.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.334, + "acc_stderr": 0.014922019523732961 + }, + "anli_r2": { + "acc": 0.329, + "acc_stderr": 0.014865395385928364 + }, + "anli_r3": { + "acc": 0.3283333333333333, + "acc_stderr": 0.013562032919529022 + }, + "cb": { + "acc": 0.4107142857142857, + "acc_stderr": 0.0663363415035954, + "f1": 0.2859195402298851 + }, + "copa": { + "acc": 0.58, + "acc_stderr": 0.049604496374885836 + }, + "hellaswag": { + "acc": 0.28251344353714397, + "acc_stderr": 0.004493015945599715, + "acc_norm": 0.29356701852220674, + "acc_norm_stderr": 0.004544651976040094 + }, + "rte": { + "acc": 0.5054151624548736, + "acc_stderr": 0.030094698123239966 + }, + "winogrande": { + "acc": 0.516179952644041, + "acc_stderr": 0.0140451261309786 + }, + "storycloze_2016": { + "acc": 0.5547835382148584, + "acc_stderr": 0.011492819519292362 + }, + "boolq": { + "acc": 0.5700305810397553, + "acc_stderr": 0.00865885369072926 + }, + "arc_easy": { + "acc": 0.3838383838383838, + "acc_stderr": 0.009979061846649307, + "acc_norm": 0.3653198653198653, + "acc_norm_stderr": 0.009880576614806928 + }, + "arc_challenge": { + "acc": 0.17406143344709898, + "acc_stderr": 0.01108017712948222, + "acc_norm": 0.22098976109215018, + "acc_norm_stderr": 0.012124929206818258 + }, + "sciq": { + "acc": 0.663, + "acc_stderr": 0.014955087918653605, + "acc_norm": 0.593, + "acc_norm_stderr": 0.015543249100255542 + }, + "piqa": { + "acc": 0.6349292709466812, + "acc_stderr": 0.01123302183055483, + "acc_norm": 0.6316648531011969, + "acc_norm_stderr": 0.011254089354334362 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_2.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_2.csv new file mode 100644 index 0000000000000000000000000000000000000000..37c250e7e795a12c91977fcce963ae8efa8ef0f1 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_2.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.329,0.01486539538592836,0 +anli_r2,acc,0.342,0.015008706182121731,0 +anli_r3,acc,0.3475,0.013751753243291852,0 +arc_challenge,acc,0.181740614334471,0.011269198948880236,0 +arc_challenge,acc_norm,0.23037542662116042,0.01230492841874761,0 +arc_easy,acc,0.3867845117845118,0.009993308355370977,0 +arc_easy,acc_norm,0.36784511784511786,0.009894923464455193,0 +boolq,acc,0.581651376146789,0.008627661390825414,1 +cb,acc,0.42857142857142855,0.06672848092813058,1 +cb,f1,0.2791044776119403,,1 +copa,acc,0.59,0.04943110704237102,0 +hellaswag,acc,0.27982473610834496,0.004479955169853626,0 +hellaswag,acc_norm,0.2980481975702051,0.004564659775075937,0 +piqa,acc,0.6360174102285092,0.011225875703487176,0 +piqa,acc_norm,0.6245919477693145,0.01129783958977666,0 +rte,acc,0.4693140794223827,0.03003973059219781,0 +sciq,acc,0.647,0.015120172605483694,0 +sciq,acc_norm,0.601,0.015493193313162906,0 +storycloze_2016,acc,0.5585248530197755,0.011482952758456903,0 +winogrande,acc,0.5232833464877664,0.014037241309573642,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_2.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_2.json new file mode 100644 index 0000000000000000000000000000000000000000..8841f57588cbdc42033eabb77a4809566961f598 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_2.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.329, + "acc_stderr": 0.01486539538592836 + }, + "anli_r2": { + "acc": 0.342, + "acc_stderr": 0.015008706182121731 + }, + "anli_r3": { + "acc": 0.3475, + "acc_stderr": 0.013751753243291852 + }, + "cb": { + "acc": 0.42857142857142855, + "acc_stderr": 0.06672848092813058, + "f1": 0.2791044776119403 + }, + "copa": { + "acc": 0.59, + "acc_stderr": 0.04943110704237102 + }, + "hellaswag": { + "acc": 0.27982473610834496, + "acc_stderr": 0.004479955169853626, + "acc_norm": 0.2980481975702051, + "acc_norm_stderr": 0.004564659775075937 + }, + "rte": { + "acc": 0.4693140794223827, + "acc_stderr": 0.03003973059219781 + }, + "winogrande": { + "acc": 0.5232833464877664, + "acc_stderr": 0.014037241309573642 + }, + "storycloze_2016": { + "acc": 0.5585248530197755, + "acc_stderr": 0.011482952758456903 + }, + "boolq": { + "acc": 0.581651376146789, + "acc_stderr": 0.008627661390825414 + }, + "arc_easy": { + "acc": 0.3867845117845118, + "acc_stderr": 0.009993308355370977, + "acc_norm": 0.36784511784511786, + "acc_norm_stderr": 0.009894923464455193 + }, + "arc_challenge": { + "acc": 0.181740614334471, + "acc_stderr": 0.011269198948880236, + "acc_norm": 0.23037542662116042, + "acc_norm_stderr": 0.01230492841874761 + }, + "sciq": { + "acc": 0.647, + "acc_stderr": 0.015120172605483694, + "acc_norm": 0.601, + "acc_norm_stderr": 0.015493193313162906 + }, + "piqa": { + "acc": 0.6360174102285092, + "acc_stderr": 0.011225875703487176, + "acc_norm": 0.6245919477693145, + "acc_norm_stderr": 0.01129783958977666 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_3.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_3.csv new file mode 100644 index 0000000000000000000000000000000000000000..178b8b965db5bc4e610e6c2b862e57a2df315364 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_3.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.318,0.014734079309311901,0 +anli_r2,acc,0.371,0.015283736211823188,0 +anli_r3,acc,0.33,0.013579531277800918,0 +arc_challenge,acc,0.17747440273037543,0.011165138769643944,0 +arc_challenge,acc_norm,0.22098976109215018,0.012124929206818258,0 +arc_easy,acc,0.39015151515151514,0.010009118166667405,0 +arc_easy,acc_norm,0.36742424242424243,0.009892552616211553,0 +boolq,acc,0.5914373088685015,0.008597580502718664,1 +cb,acc,0.39285714285714285,0.0658538889806635,1 +cb,f1,0.2673509286412512,,1 +copa,acc,0.61,0.04902071300001975,0 +hellaswag,acc,0.28141804421429994,0.00448771884333028,0 +hellaswag,acc_norm,0.29645488946425014,0.004557606227194294,0 +piqa,acc,0.6381936887921654,0.011211397313020368,0 +piqa,acc_norm,0.6311207834602829,0.011257546676908807,0 +rte,acc,0.45126353790613716,0.029953149241808946,0 +sciq,acc,0.665,0.01493311749093257,0 +sciq,acc_norm,0.611,0.015424555647308488,0 +storycloze_2016,acc,0.5563869588455371,0.011488671725073457,0 +winogrande,acc,0.5185477505919495,0.014042813708888378,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_3.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_3.json new file mode 100644 index 0000000000000000000000000000000000000000..2f5fbf6c3f225222e36aee0bdf79b8be528f5672 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_3.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.318, + "acc_stderr": 0.014734079309311901 + }, + "anli_r2": { + "acc": 0.371, + "acc_stderr": 0.015283736211823188 + }, + "anli_r3": { + "acc": 0.33, + "acc_stderr": 0.013579531277800918 + }, + "cb": { + "acc": 0.39285714285714285, + "acc_stderr": 0.0658538889806635, + "f1": 0.2673509286412512 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001975 + }, + "hellaswag": { + "acc": 0.28141804421429994, + "acc_stderr": 0.00448771884333028, + "acc_norm": 0.29645488946425014, + "acc_norm_stderr": 0.004557606227194294 + }, + "rte": { + "acc": 0.45126353790613716, + "acc_stderr": 0.029953149241808946 + }, + "winogrande": { + "acc": 0.5185477505919495, + "acc_stderr": 0.014042813708888378 + }, + "storycloze_2016": { + "acc": 0.5563869588455371, + "acc_stderr": 0.011488671725073457 + }, + "boolq": { + "acc": 0.5914373088685015, + "acc_stderr": 0.008597580502718664 + }, + "arc_easy": { + "acc": 0.39015151515151514, + "acc_stderr": 0.010009118166667405, + "acc_norm": 0.36742424242424243, + "acc_norm_stderr": 0.009892552616211553 + }, + "arc_challenge": { + "acc": 0.17747440273037543, + "acc_stderr": 0.011165138769643944, + "acc_norm": 0.22098976109215018, + "acc_norm_stderr": 0.012124929206818258 + }, + "sciq": { + "acc": 0.665, + "acc_stderr": 0.01493311749093257, + "acc_norm": 0.611, + "acc_norm_stderr": 0.015424555647308488 + }, + "piqa": { + "acc": 0.6381936887921654, + "acc_stderr": 0.011211397313020368, + "acc_norm": 0.6311207834602829, + "acc_norm_stderr": 0.011257546676908807 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_4.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_4.csv new file mode 100644 index 0000000000000000000000000000000000000000..3bd8387e8c209c25abc85ecf50f228b02f2ffd08 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_4.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.334,0.014922019523732968,0 +anli_r2,acc,0.336,0.014944140233795025,0 +anli_r3,acc,0.32916666666666666,0.013570806258433625,0 +arc_challenge,acc,0.18600682593856654,0.011370940183266742,0 +arc_challenge,acc_norm,0.2363481228668942,0.012414960524301818,0 +arc_easy,acc,0.38341750841750843,0.009976995068264723,0 +arc_easy,acc_norm,0.3611111111111111,0.009856013425811242,0 +boolq,acc,0.5935779816513761,0.00859053170888219,1 +cb,acc,0.44642857142857145,0.06703189227942398,1 +cb,f1,0.3057909604519774,,1 +copa,acc,0.61,0.04902071300001974,0 +hellaswag,acc,0.28141804421429994,0.004487718843330278,0 +hellaswag,acc_norm,0.2943636725751842,0.004548247487546321,0 +piqa,acc,0.6354733405875952,0.011229456510295966,0 +piqa,acc_norm,0.6262241566920566,0.011287972563201017,0 +rte,acc,0.45126353790613716,0.029953149241808946,0 +sciq,acc,0.672,0.014853842487270333,0 +sciq,acc_norm,0.621,0.01534909100222535,0 +storycloze_2016,acc,0.5569214323890967,0.011487262197727749,0 +winogrande,acc,0.5090765588003157,0.0140501700944977,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_4.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_4.json new file mode 100644 index 0000000000000000000000000000000000000000..38a7d991d395a01563326bfe3cbbbd8848512100 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_4.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.334, + "acc_stderr": 0.014922019523732968 + }, + "anli_r2": { + "acc": 0.336, + "acc_stderr": 0.014944140233795025 + }, + "anli_r3": { + "acc": 0.32916666666666666, + "acc_stderr": 0.013570806258433625 + }, + "cb": { + "acc": 0.44642857142857145, + "acc_stderr": 0.06703189227942398, + "f1": 0.3057909604519774 + }, + "copa": { + "acc": 0.61, + "acc_stderr": 0.04902071300001974 + }, + "hellaswag": { + "acc": 0.28141804421429994, + "acc_stderr": 0.004487718843330278, + "acc_norm": 0.2943636725751842, + "acc_norm_stderr": 0.004548247487546321 + }, + "rte": { + "acc": 0.45126353790613716, + "acc_stderr": 0.029953149241808946 + }, + "winogrande": { + "acc": 0.5090765588003157, + "acc_stderr": 0.0140501700944977 + }, + "storycloze_2016": { + "acc": 0.5569214323890967, + "acc_stderr": 0.011487262197727749 + }, + "boolq": { + "acc": 0.5935779816513761, + "acc_stderr": 0.00859053170888219 + }, + "arc_easy": { + "acc": 0.38341750841750843, + "acc_stderr": 0.009976995068264723, + "acc_norm": 0.3611111111111111, + "acc_norm_stderr": 0.009856013425811242 + }, + "arc_challenge": { + "acc": 0.18600682593856654, + "acc_stderr": 0.011370940183266742, + "acc_norm": 0.2363481228668942, + "acc_norm_stderr": 0.012414960524301818 + }, + "sciq": { + "acc": 0.672, + "acc_stderr": 0.014853842487270333, + "acc_norm": 0.621, + "acc_norm_stderr": 0.01534909100222535 + }, + "piqa": { + "acc": 0.6354733405875952, + "acc_stderr": 0.011229456510295966, + "acc_norm": 0.6262241566920566, + "acc_norm_stderr": 0.011287972563201017 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_5.csv b/421m3b93b9/evaluation/rankeval/421m3b93b9_5.csv new file mode 100644 index 0000000000000000000000000000000000000000..a02c769b38953cb6626556731997471b746c5d40 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_5.csv @@ -0,0 +1,21 @@ +task,metric,value,err,version +anli_r1,acc,0.342,0.015008706182121726,0 +anli_r2,acc,0.328,0.014853842487270336,0 +anli_r3,acc,0.3566666666666667,0.013833742805050717,0 +arc_challenge,acc,0.17406143344709898,0.011080177129482232,0 +arc_challenge,acc_norm,0.22696245733788395,0.012240491536132861,0 +arc_easy,acc,0.38762626262626265,0.009997307914447608,0 +arc_easy,acc_norm,0.3648989898989899,0.009878157021155649,0 +boolq,acc,0.5856269113149847,0.00861586377642113,1 +cb,acc,0.48214285714285715,0.06737697508644648,1 +cb,f1,0.33660130718954245,,1 +copa,acc,0.6,0.049236596391733084,0 +hellaswag,acc,0.28291177056363276,0.004494934025462339,0 +hellaswag,acc_norm,0.29595698068113924,0.004555388371756654,0 +piqa,acc,0.6294885745375408,0.011267826475447667,0 +piqa,acc_norm,0.6251360174102285,0.011294565805619015,0 +rte,acc,0.5234657039711191,0.03006330041190266,0 +sciq,acc,0.657,0.015019206922356953,0 +sciq,acc_norm,0.615,0.01539519444541081,0 +storycloze_2016,acc,0.5558524853019775,0.011490067784518672,0 +winogrande,acc,0.5209155485398579,0.014040185494212943,0 diff --git a/421m3b93b9/evaluation/rankeval/421m3b93b9_5.json b/421m3b93b9/evaluation/rankeval/421m3b93b9_5.json new file mode 100644 index 0000000000000000000000000000000000000000..004cf5530e213d7fb6ae6f71e16d00a4cc6426c5 --- /dev/null +++ b/421m3b93b9/evaluation/rankeval/421m3b93b9_5.json @@ -0,0 +1,87 @@ +{ + "results": { + "anli_r1": { + "acc": 0.342, + "acc_stderr": 0.015008706182121726 + }, + "anli_r2": { + "acc": 0.328, + "acc_stderr": 0.014853842487270336 + }, + "anli_r3": { + "acc": 0.3566666666666667, + "acc_stderr": 0.013833742805050717 + }, + "cb": { + "acc": 0.48214285714285715, + "acc_stderr": 0.06737697508644648, + "f1": 0.33660130718954245 + }, + "copa": { + "acc": 0.6, + "acc_stderr": 0.049236596391733084 + }, + "hellaswag": { + "acc": 0.28291177056363276, + "acc_stderr": 0.004494934025462339, + "acc_norm": 0.29595698068113924, + "acc_norm_stderr": 0.004555388371756654 + }, + "rte": { + "acc": 0.5234657039711191, + "acc_stderr": 0.03006330041190266 + }, + "winogrande": { + "acc": 0.5209155485398579, + "acc_stderr": 0.014040185494212943 + }, + "storycloze_2016": { + "acc": 0.5558524853019775, + "acc_stderr": 0.011490067784518672 + }, + "boolq": { + "acc": 0.5856269113149847, + "acc_stderr": 0.00861586377642113 + }, + "arc_easy": { + "acc": 0.38762626262626265, + "acc_stderr": 0.009997307914447608, + "acc_norm": 0.3648989898989899, + "acc_norm_stderr": 0.009878157021155649 + }, + "arc_challenge": { + "acc": 0.17406143344709898, + "acc_stderr": 0.011080177129482232, + "acc_norm": 0.22696245733788395, + "acc_norm_stderr": 0.012240491536132861 + }, + "sciq": { + "acc": 0.657, + "acc_stderr": 0.015019206922356953, + "acc_norm": 0.615, + "acc_norm_stderr": 0.01539519444541081 + }, + "piqa": { + "acc": 0.6294885745375408, + "acc_stderr": 0.011267826475447667, + "acc_norm": 0.6251360174102285, + "acc_norm_stderr": 0.011294565805619015 + } + }, + "versions": { + "anli_r1": 0, + "anli_r2": 0, + "anli_r3": 0, + "cb": 1, + "copa": 0, + "hellaswag": 0, + "rte": 0, + "winogrande": 0, + "storycloze_2016": 0, + "boolq": 1, + "arc_easy": 0, + "arc_challenge": 0, + "sciq": 0, + "piqa": 0 + } +} \ No newline at end of file diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..be6228c0fd1de4c852dc234c6299b83808fb06bc --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0cc1f18b4c3fe84f9e164f0f92f35b69daad89fd3baa274b098b0dcade585ee +size 78980887 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..50eb315fa3404d603819bf249cc0ba853d5f1f1c --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df0895112186c6731e1f165342bbd9f91d8aa4c20511ee0d6a3a4a2ae45dabd3 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..afb05fa59b1527c21efd511de08fc30d434b5ed5 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8493a06b1f3963823e8708c8ea17cb1135b196ebe989f48a76339f0f3e15640c +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7dd98969c8488b634167fabcc6f0a52996c285c --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a9fbc0320871a8afc4e838560d36e29282a0baae6efbbf692fbd2cb8f6aa17 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f919dafce6256528f45504a2c013201a3bf41ae9 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac27aeadec8cee153df97ec349ff8cd7a68dd10d489910adf96ad01d197dc0dc +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12241e9ab1d5d84f495ec77ba0d9a494e3abc387 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74448513e4b09c35d004642db15f54d2d0082eff510755308c25e380e4045bc9 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..57aa78e51970343c7f4767d3dd3f674b1c439bdd --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbd438e3f9001d700a6e59a7ae3a09da1bca4dac9454deb98b4ea66e9562dcb +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5393ba5df82b629eca02332deaa4a4fcd72a3982 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163951ce4171ed68672a2729ba01eed363d27551afe560243a1e032583957a9e +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..21c4320d699464f21da6c0c5eb3fddc6a5fd5aff --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab70722ee80d5a5fefd208c3bb414d2f54e6799bae27b714c074acf782cf690 +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..01b61c9e40724bc490c42cf63bf3b9b04f3bae46 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a1c655f73145412378c7d3e0afa796c5f53631ab7e70fa4e1f10e3d05dd85f +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ffc20020cfc0adefea90a460d930f9e5d214448 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a315d0829d7dc87b2fac3357926d9e65f439eb464a6ad5d00180715825b94cc7 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c22177e3bc92994f9ea7af90635bc258a962c32 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192feb92b204e25ec8a9ed3ee8a88502637d6536a87f4e7282cf5e798603646d +size 78980951 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d6f259cb9cd1bbb0b2a3e195c215447b783e529 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df127f56f20880bba800ed57837512658ac90608308bd94c692231acac5dede0 +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89a6e4846cbaf5802f1af5cf5b1a9509e9506684 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d578a39818721f3b853280e3ddb7b9222279662952461ba81c1e25182ba69ca2 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c0bfb2a017ce5348f9a26daeb1ab336cb3f2bfe --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03824dcd2002923e1e03cd30289e33952e9703ec759e329f0a9047711d472e2 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa1a2e3cc71f8efe6de24c1e18085fc990a4d8d0 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d98eb7ca3de6717638dc9274237340186a09e1a7420605be72c1e083ffa64f +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d4590853bdedf170b4d367d560d9a33273529e1 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b818e407bcdf37b93558398957e0585bf3680f4252df36664e5a0f473a62fa54 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2bf4b7af682ba24ba438e97022fb3fa094b3a3b --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdadf10f3b0480dbd5e74c1f2488b3aec8022521c47aa6ee74279a13347b7f64 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d86553755432313aa07cba204f32401a1dbae7de --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951798c71124f305c34ad46c060cc6d31cc4c0df4fe34d1f7ce07f51ddc1c740 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8738ba023377a750769fdda3757b5ad4c0956caf --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f95140adb6ee464be5be24fccc3ade118bf25fd89511f89636a47088b42e90f +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97347795200c9ae38e7fef39e844573ad42f18f0 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40f868cd668b87f09bf2ec017b4ab3879768fb215d556311150681aeb2bfc90d +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bef752f1d2dcce9016519cb6fd5a3f13188ba81 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebfdc0201effe209c65d98b8adf6693bf8d99915774c1085168197982ef552d +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba3e36262a24b2b8e9c978951b559edb935f1dcf --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6624c685b814ca95d90693bab21e8b79ecedac57d67ae60864e1aa90589a107 +size 78980887 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..456bbac9942a34c4f1caecd396abef7d5f759bcb --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b9f64d001592b0b397f4bc85bfeda14cbd61b82ecc5883f9ed1ee981761c09 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4b593453169262f9207f14b11e83bbb0d3c3c72 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbae0cbc20a6aacb2546a11ddfc98a3bf613a5dbff33c9172684904f797bcfe +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e786ad80eee1f1172909103e1ed6016329fa703 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e19db55fa023a0299e5ec9c2c0553a2164d5bc5ba21414190a5430a9e368105 +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..90c59fdeaa4d6bb2f74e319c18bcd607443c1fa9 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e13fb0d83de81595cfd0a7b085c179d05d618f69abf3c455c4d49f66c6136cb +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7504b4ccecd28d6b9f20f6f6be63c9fc74d85f --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4776f01281f40bb268f23b4ceb277deeb9ec94ce727bb3f471dce0cf7ce31245 +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d92c51be51d30aa42a178513e7425b9a20202937 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e71f6992e98e08a35409f69e7e91dac4b3096fcdfb980196744c8eee22f0d8 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e4f6ebdbba4c4db14c98fdbbbef59b72d5861fb --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53706886097691e1565861c129de32a43b212df80da8c11baf181e3097062395 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d93cb3f1652b20a460ee4959076112255afc618c --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccf305f363a56a52814c09978e80df76c6112fdb13c8629e48988764ab333cc +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..12504d11bc076b1717534d35be7171244a9ecb49 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3938ef44e7e0438859a004ab32674c3a6d7e2ab38e37f839e07a449d51d05058 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f93144bdd90d0e6e859499074c59583a02c374a0 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700bfab8864f0be546dab29df06a6d1bae943028a0ac05b704b471a4cd6f6ef9 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c1ad06cb3c01ce06b50ffbcff3704cef59704b1 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa286c76fa0f922ab23f17e19d451280292f3387ab33128dc4af11e16ca408a +size 78980887 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..20c04a57e7f381c6340b48cf50ef635e3c47227d --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15b7a4584f2578fb6b42e9f430c39f59ac7b1b51f69527dfe8c593b6a59530ab +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e6dae8b465ddfc61f8973c59e3b72da6398ffd3 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe879954e57b2375ff9f98d96fa71e24ab63f21340f201312dec024ca6ea03be +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4fe253b5475aa5edde115c09c6bf19326bc76df --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc23da6dd4cd99838ab706b5e561525d67fad7708eee38673d9e5ac1800b36cc +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f085bf1768cb699265023db15f07e3a329c04e45 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de3ade9b7dba457603cf78214c828b1e9bcf39d8143d09027ca0be0ea04da21 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1358156a94ba750d9f4f5f78bd8b93237586d32c --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c1b8d38e60b8fb94e83be898ce7578635cb3553fa7638f4eecfa2cc523335a +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..89acf49a9538a631fc307ade7425be5822517dd5 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df867fca07e5acb0a2f417c2b153f85af736869d472619a0f53ed99d0865539 +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc753e23256f1db3d96f62b1a83e5ce076fed8c2 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e0425ecce720f8df9617d47a90d1819b145ffa067261ed7fef758453610fb9d +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..35c5832b7ff72dfff4c70e6722b525ebc15bcae7 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ca5c9da267dd96b8d92c2c179ad395f54620e949e3ceff109273ba3b314ba7 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..449be2be4a6c2c4ce2ebbc3757ad70ec013a1bc2 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7a4c38a2220c6d078840f8036be3e0f097588aea98067eb18877b89093d34d +size 78980770 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6419b7ab04b7aa2b17ba8d0dfd8ad3e561591c76 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e19a9a7f0ad80f86872aa9591c515b813738825650e0d83cb56e781b5c52f10 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2d0fa31451eb5fdd0346a47e5146780a11b93b9 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db42fba33cbd1e1f31c798c7674bc3ae7170cd4ebc6e539d4537f5573168f77 +size 78980951 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6eedcdec0bb36e03858575c8d1318ed4282a0dff --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e44f532444508ee72f7962c3cb7169b612224d05a4c02a9e864eb91925bf7a2 +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e1eaf6ef590ec95bcfe8fa482d4638993e4b3bf --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9554edfbcc8cf72aa97c304dbcf7a770b4e98b184bce65102de895537f28dc39 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..70e9050c285a526e80c6c887268b58c1465dcb2b --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76bd73c0ad13565f7e08234db997882b147b6320c976f44f2a1a4cc1a4294762 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac939d65a4f44541979518bae224bb13b5f4c81 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adae43acea100f9cb61c00a7ded0595a1e9d38a963a296e574db005849b723e2 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7619d104507319a6aa05264fe838dca381e8a152 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb662f8374bba271e8d5f593ebe94e17bdb35aed059de5104e46a3b451087f2f +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..044e6d7607492cf6998121b9b88a5b737fbb2531 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9480d5d553ed04cbe7cb09f7bcf95c397c3c94019c86ddc16600c1c1054461 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c619b83bebe9caa07ce4729dd23f7ed9e0b3bc91 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d60804792f0d537cd9864ad11eb1b5e31d54407d4fc5ba48ac5653e014d2477 +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9dd2a4078589a68066b7edb0842a2d63d92b6d1 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1650a805a2601fb1d75027204185c25f96f73caffe43ef98ae8eeb280454d5f6 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8e13a71d9e176a13921bedce5198931064c1310 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:193f65e1716c45b7b31de1e9a29227e2324429ba23f667bf8d7b242db7307b96 +size 78981026 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c9c477d4788668a96d5545c0bc0445e3cda67d8 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056ed4b8990b5b88ca4cea03f6402f22094e8472f430c6f42782c226daf79c3a +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..53354744a11dfe208d1cc3ac5294aa1753f8d99f --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0bdd6417034174fd87b09803c4c2deec6aa2b339791ba1d7d6581342ee92c1 +size 78980887 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d876e4e0ba3ee293afa1a6285f719ba490c6812 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b08cafe5ab15584191b533435a7c85399d3c86bbe8cc5fe2529f3f0876f664f +size 78980962 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..663a3c91527b26bd93dcbb12b79f336f50906ad5 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f50748cb33c1b406b00a944566720393e49f8550d87431c4a59264fe7006bb +size 78981090 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..250a7c067d4e3361fe658c766c80dddd5478fb24 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54a067ad816e15549bd3cf4d865a0c5f393b08f691a996fbd882ecf9f871b78 +size 78980834 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1d7afbe9e0ca9cc6716c317ae8fc7a2ca022f9d --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97daf22e29b062b97967545c78a04bedd48b25de8be000207b3b796e61aa2457 +size 78980898 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..97c11a533221591d11acd5c0ecfd363ffce23d50 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e653ff104ca5f182366fd7f4b47f667800827bd2abe2df3d0300db46279ee083 +size 78980823 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8fe9e9ff7e6b3133e17e4c73b2309772c051651 --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048ec6267496f8d85ecc2f083f230851c2be7484ce1c744f41ca90d274d670ae +size 78980887 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6f3e7c614cc3c5479713f63937318d38d8411ed --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90180e67ed911d0aced086ea1620106f01ed86a0fcacc406e3f34929be95c8fb +size 78980951 diff --git a/421m3b93b9/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/421m3b93b9/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..71943d6dc81d6dc88d4c881e8bd21023cfe95f1d --- /dev/null +++ b/421m3b93b9/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a2f3bb3c20b0bce337c00c50a7962ffbfbc9dc034f7b9d47bc9b28aae573c2 +size 78980887 diff --git a/421m3b93b9/global_step7508/layer_01-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_01-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8b6268006992aa13985fe5d754cf7ba1a28d441 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_01-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a382462ab2d66971aacaa20b763b7e041c46fb154b55dd06922903a85843313 +size 134022403 diff --git a/421m3b93b9/global_step7508/layer_03-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_03-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a5c01b80f624577fbe774672aef58a9c23a54d7 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_03-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68365482b11cbc30d883601ef279147cb84b927f377c65dc76451e1574e9934 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_04-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_04-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4988b6c395affaa7bb4d0ae204952638b3220f0f --- /dev/null +++ b/421m3b93b9/global_step7508/layer_04-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e223717dcdb62cb03aa8b155b645abda46360369ba3f6d6d0c8feee615c4acc +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_05-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_05-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b88ec5483ddd91050fc7c0ad8ca3f8dcb3fdf3c5 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_05-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667f41d05f4f88396ebb0ead0f0b66bfa1ca4bb2628b33dfcc5345d9301fc233 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_06-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_06-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..80e4e820a129c05c6bf5f4b2ee62bef708d2e8d5 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_06-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a63df4dc588176cf9bdbaadce679b5637f848e958e6146b02b72581c46e20b +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_07-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_07-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5df91a60f1f980ee0f85bd475779ad4a29ef2fb --- /dev/null +++ b/421m3b93b9/global_step7508/layer_07-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac1834d15dd38382905aff6fb7233d4e537e456357bf5cf766659e657bcae4c +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_08-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_08-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59219a863f6bf25bf110faeea6775a388f970a2f --- /dev/null +++ b/421m3b93b9/global_step7508/layer_08-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8b40368d343ac0820785a28d944e699c438b28037c52bdee3ea933326222f4 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_09-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_09-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43c9502b71bb826b38d2ead14042005639f8db39 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_09-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b465334a932721fc5474696dfa77d63bb3ea08a0d01d4526161ba25fcc15be +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_10-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_10-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43f37a8f46efa42c670a7366eeb2b390c841d40c --- /dev/null +++ b/421m3b93b9/global_step7508/layer_10-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7312ff9a4c4bde20e76c5e4007029fcacf7a63653b3711c672f3e9791c1509a0 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_11-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_11-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ac4584019549bd2627085b170393a8c3f2570f4 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_11-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd68ec184b3c0ff1c0bb397ab65b67194f941f76a31f887aca670f0702a7b46 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_12-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_12-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fa80a75a84233f79128619ebfd924adde100cfc --- /dev/null +++ b/421m3b93b9/global_step7508/layer_12-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d218db39cab69fa58c9895d492fe2903aa31150374ec08599b164886444113 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_13-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_13-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..108729d2c052e49a6bfdc0ba5c0040241e8ca084 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_13-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa89472edde479d32907ed1617da7c9da8d30fcee472de3fe25de563d43223a +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_14-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_14-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9123ce23e5243b82eb532edf3f649b5cfa2aff9b --- /dev/null +++ b/421m3b93b9/global_step7508/layer_14-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0aba517a7843d78567826850ba0e01ef93818d0ee7568e79dfd5d9c21dcf81b +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_15-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_15-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..10c31c2b47f28dbc325be28af8de96b7da44eb18 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_15-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86c1df9aeebaffdfe21542ecc7b7267c4f7f6c562953ced7b4323f82982d17c5 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_16-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_16-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e6c7eab3dc184a8b0c43675acd7ff78a9223ebc --- /dev/null +++ b/421m3b93b9/global_step7508/layer_16-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa72139c383298ec665f75972436cf079208acb903be71d4a0e289ddc0bbe426 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_17-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_17-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..00a29ea7b5d0e4ef293e364f14adc09e65d93e4e --- /dev/null +++ b/421m3b93b9/global_step7508/layer_17-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50f41e29499183853d20412eee0bf1a4cc3d469f711a70c09ced3c06748905c4 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_18-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_18-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43fd949ab47dd120ebe7b4b1521b75ea8f7e4d20 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_18-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e612b4508fdfe6007c1efe9f55e50ba7bd2bf086721fbbbd0fe7a8e180eca9d +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_19-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_19-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8be80cf6ec13fdac518149aea356d739d56d6da --- /dev/null +++ b/421m3b93b9/global_step7508/layer_19-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72fd9c077952d43fa7c80bf4d5c0ea0bf60805aa89dd08f5657b1db4a1066548 +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_20-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_20-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba5fd97987b065a672aae8a1c7acb32516b9d4d5 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_20-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892452a756d1225e8549eb0c7862cde60ec502d073422983e2b564caff9d002c +size 39359235 diff --git a/421m3b93b9/global_step7508/layer_22-model_00-model_states.pt b/421m3b93b9/global_step7508/layer_22-model_00-model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..07752b2ac974e04357855d9dd1f329fc2ff835a3 --- /dev/null +++ b/421m3b93b9/global_step7508/layer_22-model_00-model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70332a13fa3b0660c5b474c9253c771bb47f129f17a746c0f07924b2dd414dc3 +size 6339 diff --git a/421m3b93b9/global_step7508/mp_rank_00_model_states.pt b/421m3b93b9/global_step7508/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5022734daa9885f2ef723bc3545c6647ebddc02 --- /dev/null +++ b/421m3b93b9/global_step7508/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a337d36d5ed1cc01115b0560ca0cd6685ce3d815b285fe60c3e5612f82eab30a +size 37747 diff --git a/421m3b93b9/logs/2821215.err b/421m3b93b9/logs/2821215.err new file mode 100644 index 0000000000000000000000000000000000000000..ac38912b5afac63442239173e242a1bd4df0fb4d --- /dev/null +++ b/421m3b93b9/logs/2821215.err @@ -0,0 +1,1121 @@ +6: 2023-02-10 00:15:21.326230: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326262: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.325994: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.325986: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.326002: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: 2023-02-10 00:15:21.326282: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326306: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326598: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326615: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326639: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: 2023-02-10 00:15:21.326028: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.326035: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326330: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326649: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.326047: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +1: 2023-02-10 00:15:21.326049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326341: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326658: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326672: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326684: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: 2023-02-10 00:15:21.326044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +1: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326326: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:21.326343: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +6: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +7: 2023-02-10 00:15:21.326684: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +7: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327216: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327230: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327231: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327261: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327278: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327280: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327261: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +3: 2023-02-10 00:15:21.327300: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +3: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327440: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327448: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327479: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327508: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327521: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327530: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327549: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +5: 2023-02-10 00:15:21.327567: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +5: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327777: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327786: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327815: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327832: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327868: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327883: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327888: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2: 2023-02-10 00:15:21.327899: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +2: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328045: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328054: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328071: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328093: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328121: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328141: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328140: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +4: 2023-02-10 00:15:21.328186: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +4: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374745: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374735: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374746: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374750: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374752: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374766: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +0: 2023-02-10 00:15:21.374764: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +0: To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +6: 2023-02-10 00:15:33.382916: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382517: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.382851: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 2023-02-10 00:15:33.382936: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382547: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.382884: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:33.383026: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.382871: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:33.382945: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382911: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382567: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383115: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.382918: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.382878: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.383239: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.382973: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382922: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382600: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383120: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:33.383209: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-10 00:15:33.383223: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-10 00:15:33.382954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:33.383051: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.382895: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.383252: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.382980: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382936: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382592: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383106: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.382995: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382875: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.382906: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.383268: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.382971: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382939: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382612: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.382965: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:33.383069: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382884: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 2023-02-10 00:15:33.383231: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:33.382999: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.382949: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382616: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383127: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:33.382890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-10 00:15:33.383000: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:33.383085: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:33.383003: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 2023-02-10 00:15:33.383014: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +2: 2023-02-10 00:15:33.382607: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383131: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:33.382903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-10 00:15:33.383003: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +6: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:33.383101: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-10 00:15:33.383101: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-10 00:15:33.383126: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +2: 2023-02-10 00:15:33.383134: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:33.382879: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:33.382962: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +5: 2023-02-10 00:15:33.383144: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +4: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:33.382903: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +1: 2023-02-10 00:15:33.382992: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +7: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:33.383246: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:33.383289: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-10 00:15:33.383290: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-10 00:15:33.383301: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-10 00:15:33.383301: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-10 00:15:33.383250: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-10 00:15:33.383256: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-10 00:15:33.383257: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +3: 2023-02-10 00:15:33.383259: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +1: 2023-02-10 00:15:33.383312: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.386830: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.386866: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.387267: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.386885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.387299: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.386919: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.386942: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.386959: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.386964: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.387331: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.386984: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_46200 +0: 0125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:33.387358: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.387369: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.387377: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.387394: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +0: 2023-02-10 00:15:33.387398: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392266: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392296: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392310: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392325: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392337: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392339: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392361: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:33.392367: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392213: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392482: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392241: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392275: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392287: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392301: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392323: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392329: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +4: 2023-02-10 00:15:33.392332: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392784: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392815: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392512: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392845: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392529: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392540: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392573: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392581: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392882: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392591: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392896: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392898: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392901: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +7: 2023-02-10 00:15:33.392622: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +5: 2023-02-10 00:15:33.392910: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine. +6: 2023-02-10 00:15:55.034227: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034267: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034297: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034279: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034301: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034304: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034312: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.034333: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +6: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.042568: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042797: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.042804: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-10 00:15:55.042770: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042887: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-10 00:15:55.042599: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-10 00:15:55.042831: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.042812: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: 2023-02-10 00:15:55.042794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042909: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-10 00:15:55.042613: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043082: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: 2023-02-10 00:15:55.042836: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042833: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042794: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042915: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: 2023-02-10 00:15:55.042637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042860: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-10 00:15:55.042846: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042861: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:55.043117: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042813: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042925: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-10 00:15:55.042857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042868: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.042641: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043135: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042819: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042931: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042896: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-10 00:15:55.042882: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042885: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.042656: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043152: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042823: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.042934: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042918: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-10 00:15:55.042896: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042890: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.042658: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043163: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042940: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.043061: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.042954: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: 2023-02-10 00:15:55.042902: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042902: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.042652: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043176: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +7: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.042945: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +3: 2023-02-10 00:15:55.043072: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +1: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.042907: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +5: 2023-02-10 00:15:55.042908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +4: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:55.043432: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.042993: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +0: 2023-02-10 00:15:55.043452: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/project_462000125 +2: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: /samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046674: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046678: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046682: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046690: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-10 00:15:55.046692: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-10 00:15:55.046680: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047027: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046849: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-10 00:15:55.046685: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047074: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047028: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046852: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-10 00:15:55.046687: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047075: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046696: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-10 00:15:55.046705: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-10 00:15:55.046857: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-10 00:15:55.046706: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +4: 2023-02-10 00:15:55.046709: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047032: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046743: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046757: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047002: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047033: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-10 00:15:55.047077: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +4: 2023-02-10 00:15:55.046765: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047084: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +4: 2023-02-10 00:15:55.046781: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047036: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046859: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-10 00:15:55.047079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047007: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047039: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046862: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: 2023-02-10 00:15:55.047079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-10 00:15:55.047079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047082: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046871: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047042: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +7: 2023-02-10 00:15:55.046871: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046874: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046877: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047079: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +0: 2023-02-10 00:15:55.047056: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047056: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-10 00:15:55.047086: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: 2023-02-10 00:15:55.046879: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046879: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047008: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047091: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-10 00:15:55.047096: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046918: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047081: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047097: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-10 00:15:55.047101: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-10 00:15:55.047103: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.047084: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047010: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: 2023-02-10 00:15:55.047102: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +6: 2023-02-10 00:15:55.047106: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046921: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047084: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047148: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +7: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.047078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047092: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047013: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +6: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +6: 2023-02-10 00:15:55.047161: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046932: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +7: 2023-02-10 00:15:55.046933: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047089: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.047078: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047094: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-10 00:15:55.047096: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047011: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +5: 2023-02-10 00:15:55.047099: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-10 00:15:55.047100: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047019: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047024: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +5: 2023-02-10 00:15:55.047100: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-10 00:15:55.047102: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +5: 2023-02-10 00:15:55.047104: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047025: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047027: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047028: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047030: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +1: 2023-02-10 00:15:55.047033: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047083: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +1: 2023-02-10 00:15:55.047063: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +1: 2023-02-10 00:15:55.047078: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047087: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +2: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +2: 2023-02-10 00:15:55.047097: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047100: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047100: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047103: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047105: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047629: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047631: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047633: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047634: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +0: 2023-02-10 00:15:55.047059: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047062: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047062: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047103: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047103: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +2: 2023-02-10 00:15:55.047105: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047063: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047065: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: 2023-02-10 00:15:55.047065: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047637: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047639: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047640: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047642: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /pfs/lustrep2/projappl/project_462000125/samantao-public/apps/aws-ofi-rccl:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/rccl/rccl-develop-release/rccl/lib:/pfs/lustrep4/projappl/project_462000075/samantao-public/rocm/glibc/selected:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hip/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/hsa/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/llvm:/pfs/lustrep2/projappl/pro +3: ject_462000125/samantao-public/apps/suse-repo-deps/lib64:/pfs/lustrep2/projappl/project_462000125/samantao-public/apps/suse-repo-deps/usr/lib64:/opt/cray/pe/python/3.9.12.1/lib:/opt/cray/pe/gcc-libs:/opt/cray/libfabric/1.15.0.0/lib64 +3: 2023-02-10 00:15:55.047647: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047650: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047650: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047652: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047652: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047654: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047656: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +3: 2023-02-10 00:15:55.047657: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly. +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_upper_triang_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_upper_triang_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module scaled_masked_softmax_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module scaled_masked_softmax_cuda... +0: Successfully preprocessed all matching files. +0: Detected CUDA files, patching ldflags +0: Emitting ninja build file /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/build/build.ninja... +0: Building extension module fused_mix_prec_layer_norm_cuda... +0: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +0: Loading extension module fused_mix_prec_layer_norm_cuda... +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +0: Successfully preprocessed all matching files. +5: Successfully preprocessed all matching files. +5: Successfully preprocessed all matching files. +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +0: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +7: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +7: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +2: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +2: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +5: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +5: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +6: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +6: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +3: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +3: warnings.warn( +1: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +1: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +4: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/distributed/distributed_c10d.py:429: UserWarning: torch.distributed.distributed_c10d._get_global_rank is deprecated please use torch.distributed.distributed_c10d.get_global_rank instead +4: warnings.warn( +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: +6: +6: +6: +6: +6: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +2: +2: +2: +2: +2: +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +7: +7: +7: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: +4: +4: +4: +4: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +3: +3: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +1: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Emitting ninja build file /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu/utils/build.ninja... +5: Building extension module utils... +5: Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) +5: Loading extension module utils... +0: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +5: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +0: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +3: Loading extension module utils... +2: Loading extension module utils... +6: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +1: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +6: Loading extension module utils... +4: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +6: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +1: Loading extension module utils... +4: Loading extension module utils... +2: Loading extension module utils... +4: Loading extension module utils... +3: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +4: Loading extension module utils... +7: Loading extension module utils... +2: Loading extension module utils... +3: Loading extension module utils... +1: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +7: Loading extension module utils... +3: Loading extension module utils... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: +1: +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: +3: +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +7: +7: +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +5: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +4: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root...Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: +3: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +2: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +6: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +3: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: +0: Loading extension module utils...Loading extension module utils... +0: +3: No modifications detected for re-loaded extension module utils, skipping build step... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +3: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +0: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +0: +0: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +0: +0: +0: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +3: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: Loading extension module utils...Loading extension module utils... +1: +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step... +1: Loading extension module utils... +1: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +1: +1: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...Loading extension module utils... +1: +1: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +1: Loading extension module utils... +3: No modifications detected for re-loaded extension module utils, skipping build step... +3: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +7: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +2: No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +2: +2: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +2: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +2: Loading extension module utils... +2: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: Loading extension module utils... +7: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +2: Loading extension module utils... +4: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: No modifications detected for re-loaded extension module utils, skipping build step... +4: +4: Loading extension module utils... +4: Loading extension module utils...Loading extension module utils... +4: +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +6: No modifications detected for re-loaded extension module utils, skipping build step... +6: Loading extension module utils... +5: Loading extension module utils... +6: No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: No modifications detected for re-loaded extension module utils, skipping build step...Loading extension module utils... +6: Loading extension module utils... +7: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +7: No modifications detected for re-loaded extension module utils, skipping build step... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils...No modifications detected for re-loaded extension module utils, skipping build step... +6: +6: +6: Loading extension module utils...Loading extension module utils... +6: +7: Loading extension module utils... +5: +5: Loading extension module utils... +5: No modifications detected for re-loaded extension module utils, skipping build step... +5: Loading extension module utils... +0: Using /pfs/lustrep4/users/muennighoff/.cache/torch_extensions/py39_cpu as PyTorch extensions root... +0: No modifications detected for re-loaded extension module utils, skipping build step... +0: Loading extension module utils... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/utils.py:349: UserWarning: Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings +0: warnings.warn("Parameter count with the embeddings will be inaccurate with PP > 1, as the first and last stage hold several copies of the embeddings") diff --git a/421m3b93b9/logs/2821215.out b/421m3b93b9/logs/2821215.out new file mode 100644 index 0000000000000000000000000000000000000000..c8cef2a2bbcd2163a8564ad05245702b716b0927 --- /dev/null +++ b/421m3b93b9/logs/2821215.out @@ -0,0 +1,6435 @@ +Model parameters: d_model 1280 ffw_size 5120 kv_size 128 n_heads 10 n_layers 18 +Megatron-DeepSpeed/pretrain_gpt.py --tensor-model-parallel-size 1 --pipeline-model-parallel-size 1 --num-layers 18 --hidden-size 1280 --num-attention-heads 10 --kv-channels 128 --ffn-hidden-size 5120 --seq-length 2048 --max-position-embeddings 2048 --micro-batch-size 4 --global-batch-size 256 --train-samples 1 --vocab-file gpt2/vocab.json --merge-file gpt2/merges.txt --clip-grad 1.0 --kill-switch-path kill-switch-421m3b93b9val --bf16 --optimizer adam --adam-beta1 0.9 --adam-beta2 0.999 --adam-eps 1e-8 --lr 2e-4 --min-lr 2e-5 --lr-decay-style cosine --lr-decay-samples 1 --lr-warmup-samples 0 --clip-grad 1.0 --weight-decay 1e-1 --no-load-optim --reset-progress --override-lr-scheduler --log-interval 10 --save-interval 1000 --eval-interval 1 --eval-iters 100 --tensorboard-dir tensorboard_421m3b93b9val --tensorboard-queue-size 5 --log-timers-to-tensorboard --log-batch-size-to-tensorboard --log-validation-ppl-to-tensorboard --save checkpoints_421m3b93b9 --load checkpoints_421m3b93b9 --train-weighted-split-paths-path train3b9.txt --valid-weighted-split-paths-path val.txt --data-impl mmap --deepspeed --deepspeed_config ds_configs/2821215.json --zero-stage 0 +START 2821215: Fri 10 Feb 2023 12:14:38 AM EET +0: +0: +0: ======================= ROCm System Management Interface ======================= +0: ================================= Concise Info ================================= +0: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +0: 0 45.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 1 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 2 46.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 3 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 4 41.0c 88.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: 6 45.0c 82.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +0: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +0: ================================================================================ +0: ============================= End of ROCm SMI Log ============================== +6: +6: +6: ======================= ROCm System Management Interface ======================= +6: ================================= Concise Info ================================= +6: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +6: 0 43.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 2 45.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 3 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 4 44.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 5 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: 6 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +6: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +6: ================================================================================ +6: ============================= End of ROCm SMI Log ============================== +7: +7: +7: ======================= ROCm System Management Interface ======================= +7: ================================= Concise Info ================================= +7: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +7: 0 46.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 2 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 4 48.0c 84.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: 6 45.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +7: 7 42.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +7: ================================================================================ +7: ============================= End of ROCm SMI Log ============================== +5: +5: +5: ======================= ROCm System Management Interface ======================= +5: ================================= Concise Info ================================= +5: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +5: 0 47.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 1 51.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 2 40.0c 89.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 4 47.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: 6 45.0c 85.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +5: 7 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +5: ================================================================================ +5: ============================= End of ROCm SMI Log ============================== +2: +2: +2: ======================= ROCm System Management Interface ======================= +2: ================================= Concise Info ================================= +2: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +2: 0 46.0c 95.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 2 42.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 3 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 4 44.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: 6 42.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +2: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +2: ================================================================================ +2: ============================= End of ROCm SMI Log ============================== +4: +4: +4: ======================= ROCm System Management Interface ======================= +4: ================================= Concise Info ================================= +4: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +4: 0 47.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 1 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 2 40.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 3 49.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 4 42.0c 96.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 5 46.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: 6 39.0c 92.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +4: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +4: ================================================================================ +4: ============================= End of ROCm SMI Log ============================== +3: +3: +3: ======================= ROCm System Management Interface ======================= +3: ================================= Concise Info ================================= +3: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +3: 0 47.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 2 43.0c 91.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 3 45.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 4 44.0c 87.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 5 50.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: 6 40.0c 86.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +3: 7 47.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +3: ================================================================================ +3: ============================= End of ROCm SMI Log ============================== +1: +1: +1: ======================= ROCm System Management Interface ======================= +1: ================================= Concise Info ================================= +1: GPU Temp AvgPwr SCLK MCLK Fan Perf PwrCap VRAM% GPU% +1: 0 47.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 1 48.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 2 40.0c 94.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 3 41.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 4 44.0c 90.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 5 44.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: 6 47.0c 83.0W 800Mhz 1600Mhz 0% auto 560.0W 0% 0% +1: 7 43.0c N/A 800Mhz 1600Mhz 0% auto 0.0W 0% 0% +1: ================================================================================ +1: ============================= End of ROCm SMI Log ============================== +6: Launching on nid007044 (6/8), master nid007038 port 9999, GPUs 8, CUDA: True +2: Launching on nid007040 (2/8), master nid007038 port 9999, GPUs 8, CUDA: True +5: Launching on nid007043 (5/8), master nid007038 port 9999, GPUs 8, CUDA: True +1: Launching on nid007039 (1/8), master nid007038 port 9999, GPUs 8, CUDA: True +4: Launching on nid007042 (4/8), master nid007038 port 9999, GPUs 8, CUDA: True +7: Launching on nid007045 (7/8), master nid007038 port 9999, GPUs 8, CUDA: True +3: Launching on nid007041 (3/8), master nid007038 port 9999, GPUs 8, CUDA: True +0: Launching on nid007038 (0/8), master nid007038 port 9999, GPUs 8, CUDA: True +0: using world size: 64, data-parallel-size: 64, tensor-model-parallel size: 1, pipeline-model-parallel size: 1 +0: accumulate and all-reduce gradients in fp32 for bfloat16 data type. +0: using torch.bfloat16 for parameters ... +0: ------------------------ arguments ------------------------ +0: abort_on_unmet_fused_kernel_constraints ......... False +0: accumulate_allreduce_grads_in_fp32 .............. True +0: adam_beta1 ...................................... 0.9 +0: adam_beta2 ...................................... 0.999 +0: adam_eps ........................................ 1e-08 +0: adlr_autoresume ................................. False +0: adlr_autoresume_interval ........................ 1000 +0: apply_query_key_layer_scaling ................... True +0: apply_residual_connection_post_layernorm ........ False +0: attention_dropout ............................... 0.1 +0: attention_softmax_in_fp32 ....................... False +0: bert_binary_head ................................ True +0: bert_load ....................................... None +0: bf16 ............................................ True +0: bias_dropout_fusion ............................. True +0: bias_gelu_fusion ................................ True +0: biencoder_projection_dim ........................ 0 +0: biencoder_shared_query_context_model ............ False +0: block_data_path ................................. None +0: checkpoint_activations .......................... False +0: checkpoint_in_cpu ............................... False +0: checkpoint_num_layers ........................... 1 +0: clip_grad ....................................... 1.0 +0: codecarbon_dir .................................. None +0: consumed_train_samples .......................... 0 +0: consumed_train_tokens ........................... 0 +0: consumed_valid_samples .......................... 0 +0: contigious_checkpointing ........................ False +0: cpu_optimizer ................................... False +0: cpu_torch_adam .................................. False +0: curriculum_learning ............................. False +0: data_impl ....................................... mmap +0: data_parallel_size .............................. 64 +0: data_path ....................................... None +0: dataloader_type ................................. single +0: DDP_impl ........................................ local +0: decoder_seq_length .............................. None +0: deepscale ....................................... False +0: deepscale_config ................................ None +0: deepspeed ....................................... True +0: deepspeed_activation_checkpointing .............. False +0: deepspeed_config ................................ ds_configs/2821215.json +0: deepspeed_mpi ................................... False +0: distribute_checkpointed_activations ............. False +0: distributed_backend ............................. nccl +0: embed_layernorm ................................. False +0: embedding_path .................................. None +0: encoder_seq_length .............................. 2048 +0: eod_mask_loss ................................... False +0: eval_interval ................................... 1 +0: eval_iters ...................................... 100 +0: eval_only ....................................... None +0: evidence_data_path .............................. None +0: exit_duration_in_mins ........................... None +0: exit_interval ................................... None +0: ffn_hidden_size ................................. 5120 +0: finetune ........................................ False +0: fp16 ............................................ False +0: fp16_lm_cross_entropy ........................... False +0: fp32_residual_connection ........................ False +0: gigaflos_no_embeds .............................. 0 +0: global_batch_size ............................... 256 +0: glu_activation .................................. None +0: hidden_dropout .................................. 0.1 +0: hidden_size ..................................... 1280 +0: hysteresis ...................................... 2 +0: ict_head_size ................................... None +0: ict_load ........................................ None +0: img_dim ......................................... 224 +0: indexer_batch_size .............................. 128 +0: indexer_log_interval ............................ 1000 +0: inference ....................................... False +0: init_method_std ................................. 0.02 +0: init_method_xavier_uniform ...................... False +0: initial_loss_scale .............................. 4294967296 +0: kill_switch_path ................................ kill-switch-421m3b93b9val +0: kv_channels ..................................... 128 +0: layer_norm_fusion ............................... True +0: layernorm_epsilon ............................... 1e-05 +0: lazy_mpu_init ................................... None +0: load ............................................ checkpoints_421m3b93b9 +0: local_rank ...................................... None +0: log_batch_size_to_tensorboard ................... True +0: log_interval .................................... 10 +0: log_learning_rate_to_tensorboard ................ True +0: log_level ....................................... None +0: log_level_replica ............................... None +0: log_loss_scale_to_tensorboard ................... True +0: log_num_zeros_in_grad ........................... False +0: log_params_norm ................................. False +0: log_path ........................................ None +0: log_timers_to_tensorboard ....................... True +0: log_validation_ppl_to_tensorboard ............... True +0: loss_on_targets_only ............................ False +0: loss_scale ...................................... None +0: loss_scale_window ............................... 1000 +0: lr .............................................. 0.0002 +0: lr_decay_iters .................................. None +0: lr_decay_samples ................................ 1 +0: lr_decay_style .................................. cosine +0: lr_decay_tokens ................................. None +0: lr_warmup_fraction .............................. None +0: lr_warmup_iters ................................. 0 +0: lr_warmup_samples ............................... 0 +0: make_vocab_size_divisible_by .................... 128 +0: mask_prob ....................................... 0.15 +0: masked_softmax_fusion ........................... True +0: max_position_embeddings ......................... 2048 +0: mean_noise_span_length .......................... None +0: memory_centric_tiled_linear ..................... False +0: merge_file ...................................... gpt2/merges.txt +0: micro_batch_size ................................ 4 +0: min_loss_scale .................................. 1.0 +0: min_lr .......................................... 2e-05 +0: mmap_warmup ..................................... False +0: no_load_optim ................................... True +0: no_load_rng ..................................... None +0: no_save_optim ................................... None +0: no_save_rng ..................................... None +0: noise_density ................................... None +0: num_attention_heads ............................. 10 +0: num_channels .................................... 3 +0: num_classes ..................................... 1000 +0: num_layers ...................................... 18 +0: num_layers_per_virtual_pipeline_stage ........... None +0: num_workers ..................................... 2 +0: onnx_safe ....................................... None +0: openai_gelu ..................................... False +0: optimizer ....................................... adam +0: optimizer_fusion ................................ True +0: override_lr_scheduler ........................... True +0: pad_vocab_size_to ............................... None +0: params_dtype .................................... torch.bfloat16 +0: partition_activations ........................... False +0: patch_dim ....................................... 16 +0: pipeline_model_parallel_size .................... 1 +0: position_embedding_type ......................... PositionEmbeddingType.absolute +0: pp_partition_method ............................. None +0: profile_backward ................................ False +0: query_in_block_prob ............................. 0.1 +0: rampup_batch_size ............................... None +0: rank ............................................ 0 +0: remote_device ................................... none +0: reset_attention_mask ............................ False +0: reset_position_ids .............................. False +0: reset_progress .................................. True +0: retriever_report_topk_accuracies ................ [] +0: retriever_score_scaling ......................... False +0: retriever_seq_length ............................ 256 +0: reweight_loss_based_on_position_frequency ....... False +0: sample_rate ..................................... 1.0 +0: save ............................................ checkpoints_421m3b93b9 +0: save_interval ................................... 1000 +0: scatter_gather_tensors_in_pipeline .............. True +0: scattered_embeddings ............................ False +0: seed ............................................ 1234 +0: seq_length ...................................... 2048 +0: sgd_momentum .................................... 0.9 +0: short_seq_prob .................................. 0.1 +0: skip_train_iteration_range ...................... None +0: split ........................................... None +0: split_transformers .............................. False +0: sync_tp_duplicated_parameters ................... False +0: synchronize_each_layer .......................... False +0: tensor_model_parallel_size ...................... 1 +0: tensorboard_dir ................................. tensorboard_421m3b93b9val +0: tensorboard_log_interval ........................ 1 +0: tensorboard_queue_size .......................... 5 +0: test_weighted_split_paths ....................... None +0: test_weighted_split_paths_path .................. None +0: tile_factor ..................................... 1 +0: titles_data_path ................................ None +0: tokenizer_name_or_path .......................... None +0: tokenizer_type .................................. GPT2BPETokenizer +0: train_iters ..................................... None +0: train_samples ................................... 1 +0: train_tokens .................................... None +0: train_weighted_split_names ...................... ['train'] +0: train_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document']] +0: train_weighted_split_paths_path ................. None +0: train_weighted_split_splits ..................... [['0:1']] +0: train_weighted_split_weights .................... [['1.0']] +0: universal_checkpoint ............................ False +0: use_bnb_optimizer ............................... False +0: use_checkpoint_lr_scheduler ..................... False +0: use_contiguous_buffers_in_ddp ................... True +0: use_cpu_initialization .......................... None +0: use_one_sent_docs ............................... False +0: use_pin_memory .................................. False +0: valid_num_workers ............................... 2 +0: valid_weighted_split_names ...................... ['validation'] +0: valid_weighted_split_paths ...................... [['/scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document']] +0: valid_weighted_split_paths_path ................. None +0: valid_weighted_split_splits ..................... [['0:1']] +0: valid_weighted_split_weights .................... [['1.0']] +0: virtual_pipeline_model_parallel_size ............ None +0: vocab_extra_ids ................................. 0 +0: vocab_file ...................................... gpt2/vocab.json +0: weight_decay .................................... 0.1 +0: world_size ...................................... 64 +0: zero_allgather_bucket_size ...................... 0.0 +0: zero_contigious_gradients ....................... False +0: zero_reduce_bucket_size ......................... 0.0 +0: zero_reduce_scatter ............................. False +0: zero_stage ...................................... 0 +0: -------------------- end of arguments --------------------- +0: setting number of micro-batches to constant 1 +0: > building GPT2BPETokenizer tokenizer ... +0: > padded vocab (size: 50257) with 47 dummy tokens (new size: 50304) +0: DeepSpeed general environment info: +0: torch install path ............... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch'] +0: torch version .................... 1.13.0+rocm5.2 +0: torch cuda version ............... None +0: torch hip version ................ 5.2.21151-afdc89f8 +0: nvcc version ..................... None +0: deepspeed install path ........... ['/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/deepspeed'] +0: deepspeed info ................... 0.7.5, unknown, unknown +0: deepspeed wheel compiled w. ...... torch 1.13, hip 5.1 +0: **** Git info for Megatron: git_hash=unknown git_branch=unknown **** +0: > initializing torch distributed ... +0: [2023-02-10 00:16:56,343] [INFO] [comm.py:633:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +7: > setting tensorboard ... +0: > initializing tensor model parallel with size 1 +0: > initializing pipeline model parallel with size 1 +0: > setting random seeds to 1234 ... +0: > initializing model parallel cuda seeds on global rank 0, model parallel rank 0, and data parallel rank 0 with model parallel seed: 3952 and data parallel seed: 1234 +0: > compiling dataset index builder ... +0: make: Entering directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: make: Nothing to be done for 'default'. +0: make: Leaving directory '/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/data' +0: >>> done with dataset index builder. Compilation time: 0.178 seconds +0: > compiling and loading fused kernels ... +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 87 +0: [1/1] c++ scaled_upper_triang_masked_softmax_hip.cuda.o scaled_upper_triang_masked_softmax_hip.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_upper_triang_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.cpp [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_cuda.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 63 +0: [1/1] c++ scaled_masked_softmax_hip.o scaled_masked_softmax_hip.cuda.o -shared -L/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/venv/lib/python3.9/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/pfs/lustrep2/projappl/project_462000125/samantao-public/rocm/rocm-5.2.3/lib -lamdhip64 -o scaled_masked_softmax_cuda.so +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda.cpp [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_cuda_kernel.cu -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/layer_norm_hip_kernel.hip [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/type_shim.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/compat.h [skipped, no changes] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_upper_triang_masked_softmax_hip.h [skipped, already hipified] +0: /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax.h -> /pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/Megatron-DeepSpeed/megatron/fused_kernels/scaled_masked_softmax_hip.h [skipped, already hipified] +0: Total number of unsupported CUDA function calls: 0 +0: +0: +0: Total number of replaced kernel launches: 67 +0: ninja: no work to do. +0: >>> done with compiling and loading fused kernels. Compilation time: 19.299 seconds +0: time to initialize megatron (seconds): -17.007 +0: [after megatron is initialized] datetime: 2023-02-10 00:17:18 +0: building GPT model ... +0: [2023-02-10 00:17:19,124] [INFO] [utils.py:827:see_memory_usage] Before Building Model +0: [2023-02-10 00:17:19,125] [INFO] [utils.py:828:see_memory_usage] MA 0.0 GB Max_MA 0.0 GB CA 0.0 GB Max_CA 0 GB +0: [2023-02-10 00:17:19,125] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.17 GB, percent = 6.0% +0: SEED_LAYERS=False BASE_SEED=1234 SEED_FN=None +0: Using topology: {ProcessCoord(pipe=0, data=0, model=0): 0, ProcessCoord(pipe=0, data=1, model=0): 1, ProcessCoord(pipe=0, data=2, model=0): 2, ProcessCoord(pipe=0, data=3, model=0): 3, ProcessCoord(pipe=0, data=4, model=0): 4, ProcessCoord(pipe=0, data=5, model=0): 5, ProcessCoord(pipe=0, data=6, model=0): 6, ProcessCoord(pipe=0, data=7, model=0): 7, ProcessCoord(pipe=0, data=8, model=0): 8, ProcessCoord(pipe=0, data=9, model=0): 9, ProcessCoord(pipe=0, data=10, model=0): 10, ProcessCoord(pipe=0, data=11, model=0): 11, ProcessCoord(pipe=0, data=12, model=0): 12, ProcessCoord(pipe=0, data=13, model=0): 13, ProcessCoord(pipe=0, data=14, model=0): 14, ProcessCoord(pipe=0, data=15, model=0): 15, ProcessCoord(pipe=0, data=16, model=0): 16, ProcessCoord(pipe=0, data=17, model=0): 17, ProcessCoord(pipe=0, data=18, model=0): 18, ProcessCoord(pipe=0, data=19, model=0): 19, ProcessCoord(pipe=0, data=20, model=0): 20, ProcessCoord(pipe=0, data=21, model=0): 21, ProcessCoord(pipe=0, data=22, model=0): 22, ProcessCoord(pi +0: pe=0, data=23, model=0): 23, ProcessCoord(pipe=0, data=24, model=0): 24, ProcessCoord(pipe=0, data=25, model=0): 25, ProcessCoord(pipe=0, data=26, model=0): 26, ProcessCoord(pipe=0, data=27, model=0): 27, ProcessCoord(pipe=0, data=28, model=0): 28, ProcessCoord(pipe=0, data=29, model=0): 29, ProcessCoord(pipe=0, data=30, model=0): 30, ProcessCoord(pipe=0, data=31, model=0): 31, ProcessCoord(pipe=0, data=32, model=0): 32, ProcessCoord(pipe=0, data=33, model=0): 33, ProcessCoord(pipe=0, data=34, model=0): 34, ProcessCoord(pipe=0, data=35, model=0): 35, ProcessCoord(pipe=0, data=36, model=0): 36, ProcessCoord(pipe=0, data=37, model=0): 37, ProcessCoord(pipe=0, data=38, model=0): 38, ProcessCoord(pipe=0, data=39, model=0): 39, ProcessCoord(pipe=0, data=40, model=0): 40, ProcessCoord(pipe=0, data=41, model=0): 41, ProcessCoord(pipe=0, data=42, model=0): 42, ProcessCoord(pipe=0, data=43, model=0): 43, ProcessCoord(pipe=0, data=44, model=0): 44, ProcessCoord(pipe=0, data=45, model=0): 45, ProcessCoord(pipe=0, data=4 +0: 6, model=0): 46, ProcessCoord(pipe=0, data=47, model=0): 47, ProcessCoord(pipe=0, data=48, model=0): 48, ProcessCoord(pipe=0, data=49, model=0): 49, ProcessCoord(pipe=0, data=50, model=0): 50, ProcessCoord(pipe=0, data=51, model=0): 51, ProcessCoord(pipe=0, data=52, model=0): 52, ProcessCoord(pipe=0, data=53, model=0): 53, ProcessCoord(pipe=0, data=54, model=0): 54, ProcessCoord(pipe=0, data=55, model=0): 55, ProcessCoord(pipe=0, data=56, model=0): 56, ProcessCoord(pipe=0, data=57, model=0): 57, ProcessCoord(pipe=0, data=58, model=0): 58, ProcessCoord(pipe=0, data=59, model=0): 59, ProcessCoord(pipe=0, data=60, model=0): 60, ProcessCoord(pipe=0, data=61, model=0): 61, ProcessCoord(pipe=0, data=62, model=0): 62, ProcessCoord(pipe=0, data=63, model=0): 63} +0: [2023-02-10 00:17:21,132] [INFO] [module.py:366:_partition_layers] Partitioning pipeline stages with method type:transformer +0: stage=0 layers=25 +0: 0: _to_float16 +0: 1: EmbeddingPipe +0: 2: +0: 3: ParallelTransformerLayerPipe +0: 4: ParallelTransformerLayerPipe +0: 5: ParallelTransformerLayerPipe +0: 6: ParallelTransformerLayerPipe +0: 7: ParallelTransformerLayerPipe +0: 8: ParallelTransformerLayerPipe +0: 9: ParallelTransformerLayerPipe +0: 10: ParallelTransformerLayerPipe +0: 11: ParallelTransformerLayerPipe +0: 12: ParallelTransformerLayerPipe +0: 13: ParallelTransformerLayerPipe +0: 14: ParallelTransformerLayerPipe +0: 15: ParallelTransformerLayerPipe +0: 16: ParallelTransformerLayerPipe +0: 17: ParallelTransformerLayerPipe +0: 18: ParallelTransformerLayerPipe +0: 19: ParallelTransformerLayerPipe +0: 20: ParallelTransformerLayerPipe +0: 21: undo +0: 22: MixedFusedLayerNorm +0: 23: EmbeddingPipe +0: 24: float16_to_fp32 +0: loss: CrossEntropy +0: [2023-02-10 00:17:21,640] [INFO] [utils.py:827:see_memory_usage] After Building Model +0: [2023-02-10 00:17:21,641] [INFO] [utils.py:828:see_memory_usage] MA 0.79 GB Max_MA 0.79 GB CA 0.86 GB Max_CA 1 GB +0: [2023-02-10 00:17:21,641] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.19 GB, percent = 6.0% +0: setting training iterations to 0 +0: > learning rate decay style: cosine +0: DeepSpeed is enabled. +0: [2023-02-10 00:17:21,643] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed info: version=0.7.5, git-hash=unknown, git-branch=unknown +0: [2023-02-10 00:17:34,750] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False +0: [2023-02-10 00:17:34,750] [INFO] [logging.py:68:log_dist] [Rank 0] Removing param_group that has no 'params' in the client Optimizer +0: [2023-02-10 00:17:34,750] [INFO] [logging.py:68:log_dist] [Rank 0] Using client Optimizer as basic optimizer +0: [2023-02-10 00:17:34,757] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Basic Optimizer = FusedAdam +0: [2023-02-10 00:17:34,757] [INFO] [logging.py:68:log_dist] [Rank 0] Creating BF16 optimizer +0: [2023-02-10 00:17:34,882] [INFO] [utils.py:827:see_memory_usage] begin bf16_optimizer +0: [2023-02-10 00:17:34,883] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.79 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-10 00:17:34,883] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.88 GB, percent = 6.1% +5: ninja: no work to do. +2: Time to load utils op: 0.34807729721069336 seconds +5: Time to load utils op: 0.36913514137268066 seconds +5: Time to load utils op: 0.368807315826416 seconds +3: Time to load utils op: 0.34785032272338867 seconds +3: Time to load utils op: 0.3478560447692871 seconds +2: Time to load utils op: 0.3480997085571289 seconds +5: Time to load utils op: 0.36962413787841797 seconds +3: Time to load utils op: 0.3478732109069824 seconds +2: Time to load utils op: 0.34812068939208984 seconds +5: Time to load utils op: 0.3699812889099121 seconds +5: Time to load utils op: 0.36900949478149414 seconds +3: Time to load utils op: 0.3479580879211426 secondsTime to load utils op: 0.34795498847961426 seconds +3: +3: Time to load utils op: 0.347963809967041 seconds +5: Time to load utils op: 0.3684515953063965 secondsTime to load utils op: 0.36954760551452637 seconds +5: +4: Time to load utils op: 0.34807491302490234 seconds +4: Time to load utils op: 0.3481099605560303 seconds +3: Time to load utils op: 0.3479599952697754 seconds +2: Time to load utils op: 0.34818363189697266 secondsTime to load utils op: 0.3481886386871338 seconds +2: +5: Time to load utils op: 0.36985135078430176 seconds +4: Time to load utils op: 0.3481719493865967 secondsTime to load utils op: 0.3481721878051758 secondsTime to load utils op: 0.3481717109680176 seconds +4: +4: +2: Time to load utils op: 0.3482084274291992 secondsTime to load utils op: 0.3481416702270508 seconds +2: Time to load utils op: 0.34821629524230957 seconds +4: Time to load utils op: 0.348186731338501 seconds +0: Time to load utils op: 0.29048657417297363 seconds +2: +4: Time to load utils op: 0.3482034206390381 seconds +3: Time to load utils op: 0.34798240661621094 seconds +0: Time to load utils op: 0.3649885654449463 seconds +6: Time to load utils op: 0.348452091217041 seconds +6: Time to load utils op: 0.3484632968902588 seconds +6: Time to load utils op: 0.34846997261047363 seconds +7: Time to load utils op: 0.34819579124450684 seconds +4: Time to load utils op: 0.34820079803466797 seconds +1: Time to load utils op: 0.34830236434936523 seconds +1: Time to load utils op: 0.34833502769470215 secondsTime to load utils op: 0.3483450412750244 seconds +0: Time to load utils op: 0.37052154541015625 seconds +0: Time to load utils op: 0.37037134170532227 secondsTime to load utils op: 0.37051892280578613 seconds +6: Time to load utils op: 0.3484482765197754 seconds +6: Time to load utils op: 0.34848475456237793 seconds +7: Time to load utils op: 0.3482334613800049 seconds +7: Time to load utils op: 0.3482489585876465 seconds +1: +0: +6: Time to load utils op: 0.34852051734924316 seconds +7: Time to load utils op: 0.3482706546783447 seconds +1: Time to load utils op: 0.3483872413635254 seconds +0: Time to load utils op: 0.37003207206726074 secondsTime to load utils op: 0.37037110328674316 seconds +6: Time to load utils op: 0.34853219985961914 seconds +7: Time to load utils op: 0.3482804298400879 secondsTime to load utils op: 0.3482954502105713 seconds +1: Time to load utils op: 0.3483905792236328 seconds +0: +6: Time to load utils op: 0.34854960441589355 seconds +7: +1: Time to load utils op: 0.34841132164001465 secondsTime to load utils op: 0.348416805267334 seconds +1: +0: Time to load utils op: 0.3697624206542969 seconds +7: Time to load utils op: 0.3482930660247803 seconds +1: Time to load utils op: 0.3484313488006592 seconds +7: Time to load utils op: 0.34832024574279785 seconds +0: [2023-02-10 00:17:35,283] [INFO] [utils.py:827:see_memory_usage] before initializing group 0 +0: [2023-02-10 00:17:35,284] [INFO] [utils.py:828:see_memory_usage] MA 0.78 GB Max_MA 0.78 GB CA 0.88 GB Max_CA 1 GB +0: [2023-02-10 00:17:35,284] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 30.88 GB, percent = 6.1% +1: Time to load utils op: 0.0009801387786865234 seconds +3: Time to load utils op: 0.0008318424224853516 seconds +0: Time to load utils op: 0.0008862018585205078 seconds +0: Time to load utils op: 0.0008754730224609375 secondsTime to load utils op: 0.00080108642578125 seconds +0: +3: Time to load utils op: 0.0007572174072265625 seconds +0: Time to load utils op: 0.0008897781372070312 seconds +3: Time to load utils op: 0.0007338523864746094 seconds +0: Time to load utils op: 0.0009975433349609375 secondsTime to load utils op: 0.0008974075317382812 seconds +0: +0: Time to load utils op: 0.0009100437164306641 seconds +2: Time to load utils op: 0.0006532669067382812 seconds +3: Time to load utils op: 0.0008523464202880859 seconds +7: Time to load utils op: 0.0009455680847167969 seconds +1: Time to load utils op: 0.0015254020690917969 seconds +3: Time to load utils op: 0.0007817745208740234 seconds +3: Time to load utils op: 0.0007884502410888672 seconds +1: Time to load utils op: 0.0014939308166503906 seconds +3: Time to load utils op: 0.0008525848388671875 seconds +1: Time to load utils op: 0.001592874526977539 seconds +4: Time to load utils op: 0.0014688968658447266 seconds +3: Time to load utils op: 0.0009777545928955078 seconds +1: Time to load utils op: 0.0016055107116699219 seconds +1: Time to load utils op: 0.0015988349914550781 seconds +1: Time to load utils op: 0.0015370845794677734 secondsTime to load utils op: 0.0015456676483154297 seconds +1: +7: Time to load utils op: 0.0012640953063964844 seconds +5: Time to load utils op: 0.001384735107421875 seconds +6: Time to load utils op: 0.0012984275817871094 seconds +2: Time to load utils op: 0.0012912750244140625 seconds +7: Time to load utils op: 0.0014455318450927734 seconds +7: Time to load utils op: 0.0014491081237792969 seconds +4: Time to load utils op: 0.0018305778503417969 seconds +7: Time to load utils op: 0.001580953598022461 seconds +5: Time to load utils op: 0.0017299652099609375 seconds +7: Time to load utils op: 0.0015752315521240234 seconds +2: Time to load utils op: 0.0012726783752441406 seconds +2: Time to load utils op: 0.0012309551239013672 seconds +4: Time to load utils op: 0.001817941665649414 seconds +2: Time to load utils op: 0.001241445541381836 seconds +4: Time to load utils op: 0.0018868446350097656 seconds +7: Time to load utils op: 0.0015025138854980469 seconds +2: Time to load utils op: 0.001253366470336914 seconds +4: Time to load utils op: 0.0018639564514160156 seconds +2: Time to load utils op: 0.0012867450714111328 seconds +2: Time to load utils op: 0.0012676715850830078 seconds +4: Time to load utils op: 0.001871347427368164 seconds +4: Time to load utils op: 0.0018463134765625 seconds +6: Time to load utils op: 0.0014300346374511719 seconds +4: Time to load utils op: 0.0018801689147949219 seconds +6: Time to load utils op: 0.00157928466796875 seconds +7: Time to load utils op: 0.00037598609924316406 seconds +6: Time to load utils op: 0.001508951187133789 seconds +6: Time to load utils op: 0.0016531944274902344 secondsTime to load utils op: 0.0015842914581298828 secondsTime to load utils op: 0.0016148090362548828 seconds +6: +6: +6: Time to load utils op: 0.0015933513641357422 seconds +5: Time to load utils op: 0.0018079280853271484 seconds +5: Time to load utils op: 0.0018546581268310547 seconds +5: Time to load utils op: 0.0017652511596679688 seconds +5: Time to load utils op: 0.0018877983093261719 secondsTime to load utils op: 0.0018775463104248047 seconds +5: +5: Time to load utils op: 0.0019297599792480469 seconds +0: [2023-02-10 00:17:35,667] [INFO] [utils.py:827:see_memory_usage] after initializing group 0 +0: [2023-02-10 00:17:35,668] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-10 00:17:35,668] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:35,781] [INFO] [utils.py:827:see_memory_usage] before initializing group 1 +0: [2023-02-10 00:17:35,782] [INFO] [utils.py:828:see_memory_usage] MA 1.68 GB Max_MA 1.68 GB CA 2.21 GB Max_CA 2 GB +0: [2023-02-10 00:17:35,782] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:35,891] [INFO] [utils.py:827:see_memory_usage] after initializing group 1 +0: [2023-02-10 00:17:35,892] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:35,892] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:35,997] [INFO] [utils.py:827:see_memory_usage] before initializing group 2 +0: [2023-02-10 00:17:35,997] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:35,997] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:36,105] [INFO] [utils.py:827:see_memory_usage] after initializing group 2 +0: [2023-02-10 00:17:36,106] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:36,106] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:36,210] [INFO] [utils.py:827:see_memory_usage] before initialize_optimizer +0: [2023-02-10 00:17:36,210] [INFO] [utils.py:828:see_memory_usage] MA 2.38 GB Max_MA 2.38 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:36,210] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:36,321] [INFO] [utils.py:827:see_memory_usage] end initialize_optimizer +0: [2023-02-10 00:17:36,321] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:36,321] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:36,425] [INFO] [utils.py:827:see_memory_usage] end bf16_optimizer +0: [2023-02-10 00:17:36,426] [INFO] [utils.py:828:see_memory_usage] MA 2.43 GB Max_MA 2.43 GB CA 3.23 GB Max_CA 3 GB +0: [2023-02-10 00:17:36,426] [INFO] [utils.py:836:see_memory_usage] CPU Virtual Memory: used = 31.03 GB, percent = 6.2% +0: [2023-02-10 00:17:36,426] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Final Optimizer = FusedAdam +0: [2023-02-10 00:17:36,426] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed using client LR scheduler +0: [2023-02-10 00:17:36,426] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed LR Scheduler = +0: [2023-02-10 00:17:36,426] [INFO] [logging.py:68:log_dist] [Rank 0] step=0, skipped=0, lr=[0.0002, 0.0002, 0.0002], mom=[(0.9, 0.999), (0.9, 0.999), (0.9, 0.999)] +0: [2023-02-10 00:17:36,426] [INFO] [config.py:1007:print] DeepSpeedEngine configuration: +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] activation_checkpointing_config { +0: "partition_activations": false, +0: "contiguous_memory_optimization": false, +0: "cpu_checkpointing": false, +0: "number_checkpoints": null, +0: "synchronize_checkpoint_boundary": false, +0: "profile": false +0: } +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] aio_config ................... {'block_size': 1048576, 'queue_depth': 8, 'thread_count': 1, 'single_submit': False, 'overlap_events': True} +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] amp_enabled .................. False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] amp_params ................... False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] autotuning_config ............ { +0: "enabled": false, +0: "start_step": null, +0: "end_step": null, +0: "metric_path": null, +0: "arg_mappings": null, +0: "metric": "throughput", +0: "model_info": null, +0: "results_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_results", +0: "exps_dir": "/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/autotuning_exps", +0: "overwrite": true, +0: "fast": true, +0: "start_profile_step": 3, +0: "end_profile_step": 5, +0: "tuner_type": "gridsearch", +0: "tuner_early_stopping": 5, +0: "tuner_num_trials": 50, +0: "model_info_path": null, +0: "mp_size": 1, +0: "max_train_batch_size": null, +0: "min_train_batch_size": 1, +0: "max_train_micro_batch_size_per_gpu": 1.024000e+03, +0: "min_train_micro_batch_size_per_gpu": 1, +0: "num_tuning_micro_batch_sizes": 3 +0: } +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] bfloat16_enabled ............. True +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] checkpoint_parallel_write_pipeline False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] checkpoint_tag_validation_enabled True +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] checkpoint_tag_validation_fail False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] comms_config ................. +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] communication_data_type ...... None +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] compression_config ........... {'weight_quantization': {'shared_parameters': {'enabled': False, 'quantizer_kernel': False, 'schedule_offset': 0, 'quantize_groups': 1, 'quantize_verbose': False, 'quantization_type': 'symmetric', 'quantize_weight_in_forward': False, 'rounding': 'nearest', 'fp16_mixed_quantize': False, 'quantize_change_ratio': 0.001}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {'enabled': False, 'quantization_type': 'symmetric', 'range_calibration': 'dynamic', 'schedule_offset': 1000}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {'enabled': False, 'method': 'topk', 'schedule_offset': 1000}, 'different_groups': {}}, 'channel_pruning': {'shared_pa +0: rameters': {'enabled': False, 'method': 'l1', 'schedule_offset': 1000}, 'different_groups': {}}, 'layer_reduction': {'enabled': False}} +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] curriculum_enabled ........... False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] curriculum_params ............ False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] dataloader_drop_last ......... False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] disable_allgather ............ False +0: [2023-02-10 00:17:36,427] [INFO] [config.py:1011:print] dump_state ................... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] dynamic_loss_scale_args ...... None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_enabled ........... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_gas_boundary_resolution 1 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_layer_name ........ bert.encoder.layer +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_layer_num ......... 0 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_max_iter .......... 100 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_stability ......... 1e-06 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_tol ............... 0.01 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] eigenvalue_verbose ........... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] elasticity_enabled ........... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] flops_profiler_config ........ { +0: "enabled": false, +0: "profile_step": 1, +0: "module_depth": -1, +0: "top_modules": 1, +0: "detailed": true, +0: "output_file": null +0: } +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] fp16_auto_cast ............... None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] fp16_enabled ................. False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] fp16_master_weights_and_gradients False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] global_rank .................. 0 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] gradient_accumulation_steps .. 1 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] gradient_clipping ............ 1.0 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] gradient_predivide_factor .... 1.0 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] initial_dynamic_scale ........ 1 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] load_universal_checkpoint .... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] loss_scale ................... 1.0 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] memory_breakdown ............. False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] monitor_config ............... +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] nebula_config ................ { +0: "enabled": false, +0: "persistent_storage_path": null, +0: "persistent_time_interval": 100, +0: "num_of_version_in_retention": 2, +0: "enable_nebula_load": true, +0: "load_path": null +0: } +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] optimizer_legacy_fusion ...... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] optimizer_name ............... None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] optimizer_params ............. None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] pipeline ..................... {'stages': 'auto', 'partition': 'best', 'seed_layers': False, 'activation_checkpoint_interval': 0} +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] pld_enabled .................. False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] pld_params ................... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] prescale_gradients ........... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] scheduler_name ............... None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] scheduler_params ............. None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] sparse_attention ............. None +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] sparse_gradients_enabled ..... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] steps_per_print .............. 2000 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] train_batch_size ............. 256 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] train_micro_batch_size_per_gpu 4 +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] use_node_local_storage ....... False +0: [2023-02-10 00:17:36,428] [INFO] [config.py:1011:print] wall_clock_breakdown ......... False +0: [2023-02-10 00:17:36,429] [INFO] [config.py:1011:print] world_size ................... 64 +0: [2023-02-10 00:17:36,429] [INFO] [config.py:1011:print] zero_allow_untested_optimizer False +0: [2023-02-10 00:17:36,429] [INFO] [config.py:1011:print] zero_config .................. stage=0 contiguous_gradients=True reduce_scatter=True reduce_bucket_size=500000000 allgather_partitions=True allgather_bucket_size=500000000 overlap_comm=False load_from_fp32_weights=True elastic_checkpoint=False offload_param=None offload_optimizer=None sub_group_size=1000000000 cpu_offload_param=None cpu_offload_use_pin_memory=None cpu_offload=None prefetch_bucket_size=50000000 param_persistence_threshold=100000 model_persistence_threshold=9223372036854775807 max_live_parameters=1000000000 max_reuse_distance=1000000000 gather_16bit_weights_on_model_save=False stage3_gather_fp16_weights_on_model_save=False ignore_unused_parameters=True legacy_stage1=False round_robin_gradients=False +0: [2023-02-10 00:17:36,429] [INFO] [config.py:1011:print] zero_enabled ................. False +0: [2023-02-10 00:17:36,429] [INFO] [config.py:1011:print] zero_optimization_stage ...... 0 +0: [2023-02-10 00:17:36,429] [INFO] [config.py:996:print_user_config] json = { +0: "train_micro_batch_size_per_gpu": 4, +0: "train_batch_size": 256, +0: "gradient_clipping": 1.0, +0: "zero_optimization": { +0: "stage": 0 +0: }, +0: "bf16": { +0: "enabled": true +0: }, +0: "steps_per_print": 2.000000e+03, +0: "wall_clock_breakdown": false +0: } +0: Time to load utils op: 0.0004191398620605469 seconds +0: [2023-02-10 00:17:36,429] [INFO] [engine.py:87:__init__] CONFIG: micro_batches=1 micro_batch_size=4 +0: [2023-02-10 00:17:36,441] [INFO] [engine.py:145:__init__] RANK=0 STAGE=0 LAYERS=25 [0, 25) STAGE_PARAMS=421207040 (421.207M) TOTAL_PARAMS=421207040 (421.207M) UNIQUE_PARAMS=421207040 (421.207M) +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,468] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,469] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +5: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +6: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +4: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +3: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +2: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +3: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +6: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/mp_rank_00_model_states.pt. +1: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,800] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,801] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,806] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,807] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,808] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,809] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,810] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,818] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,819] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,840] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,841] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,844] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,845] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +6: [2023-02-10 00:17:36,846] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +0: [2023-02-10 00:17:36,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,850] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,858] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +5: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,860] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,861] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,864] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +2: [2023-02-10 00:17:36,866] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,867] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,868] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,868] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,869] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +4: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +7: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +1: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,870] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,871] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:36,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,871] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,872] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:36,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,873] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +3: [2023-02-10 00:17:36,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt... +5: [2023-02-10 00:17:36,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:36,877] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:36,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:36,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:36,879] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,885] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,886] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:36,887] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,887] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,888] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,888] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,889] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +1: [2023-02-10 00:17:36,889] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:36,891] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,893] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,898] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,900] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,901] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,904] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,905] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,906] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,907] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,908] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,915] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +6: [2023-02-10 00:17:36,917] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,918] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,921] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,922] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +7: [2023-02-10 00:17:36,923] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +0: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,930] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +3: [2023-02-10 00:17:36,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +4: [2023-02-10 00:17:36,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_01-model_00-model_states.pt. +2: [2023-02-10 00:17:36,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:36,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:36,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,938] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:36,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:36,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:36,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,018] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,023] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,027] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,029] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +3: [2023-02-10 00:17:37,030] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +2: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,031] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,032] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +0: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,034] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +5: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +7: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +6: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +4: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt... +1: [2023-02-10 00:17:37,036] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,037] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,039] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +6: [2023-02-10 00:17:37,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +2: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +1: [2023-02-10 00:17:37,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,051] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +7: [2023-02-10 00:17:37,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,054] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +5: [2023-02-10 00:17:37,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,056] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +3: [2023-02-10 00:17:37,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +0: [2023-02-10 00:17:37,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_03-model_00-model_states.pt. +4: [2023-02-10 00:17:37,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,075] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,221] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,222] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,223] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,223] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,224] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,225] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,226] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +3: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +1: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,227] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +0: [2023-02-10 00:17:37,228] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,229] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +7: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +2: [2023-02-10 00:17:37,230] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,231] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,231] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,232] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +4: [2023-02-10 00:17:37,233] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt... +6: [2023-02-10 00:17:37,235] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,237] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,238] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,240] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,242] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,242] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,243] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,244] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,245] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,245] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,246] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,247] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +2: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,248] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,249] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +5: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +3: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,250] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,251] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,252] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +6: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +0: [2023-02-10 00:17:37,253] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +4: [2023-02-10 00:17:37,254] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_04-model_00-model_states.pt. +1: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,255] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,256] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,257] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,260] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,261] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,262] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,373] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +0: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +1: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +4: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +7: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +2: [2023-02-10 00:17:37,379] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +6: [2023-02-10 00:17:37,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,380] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt... +5: [2023-02-10 00:17:37,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,381] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +7: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,392] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,393] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +6: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +0: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +2: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +5: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +3: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +4: [2023-02-10 00:17:37,400] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_05-model_00-model_states.pt. +1: [2023-02-10 00:17:37,400] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,405] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,584] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +3: [2023-02-10 00:17:37,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,596] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +1: [2023-02-10 00:17:37,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,597] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,602] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,603] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +1: [2023-02-10 00:17:37,604] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,611] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +3: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +7: [2023-02-10 00:17:37,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +6: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,618] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +5: [2023-02-10 00:17:37,619] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,621] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +0: [2023-02-10 00:17:37,623] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,625] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +0: [2023-02-10 00:17:37,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,626] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +4: [2023-02-10 00:17:37,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,620] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,628] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,631] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,632] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,633] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +7: [2023-02-10 00:17:37,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,633] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,634] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,635] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,637] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,638] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,639] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,641] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,642] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,642] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,643] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +6: [2023-02-10 00:17:37,644] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +5: [2023-02-10 00:17:37,645] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,647] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,655] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,657] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,658] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,659] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt... +2: [2023-02-10 00:17:37,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,664] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,665] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,670] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,671] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +4: [2023-02-10 00:17:37,627] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,672] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,679] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,680] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,681] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_06-model_00-model_states.pt. +2: [2023-02-10 00:17:37,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,687] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,688] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,744] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,753] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,759] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,760] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,760] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,761] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,762] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +4: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,763] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,764] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,765] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,766] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +1: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +7: [2023-02-10 00:17:37,767] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +0: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +2: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,768] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt... +6: [2023-02-10 00:17:37,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,769] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,770] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,770] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,771] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,772] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,772] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +0: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,774] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,775] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,775] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,776] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,777] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,778] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,779] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +3: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,780] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,781] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,781] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,782] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +7: [2023-02-10 00:17:37,782] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,783] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +1: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,784] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,785] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,785] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +5: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,786] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,787] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +2: [2023-02-10 00:17:37,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:37,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,790] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_07-model_00-model_states.pt. +4: [2023-02-10 00:17:37,790] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,791] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,792] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:37,795] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,967] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,969] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,979] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,983] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:37,993] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +0: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +1: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:37,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,996] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:37,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:37,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +3: [2023-02-10 00:17:37,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:37,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,000] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,001] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:38,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:38,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +0: [2023-02-10 00:17:38,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,003] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,004] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +5: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,005] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,006] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +2: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,007] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +7: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,008] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +4: [2023-02-10 00:17:38,009] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,009] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,010] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,011] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt... +6: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,012] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,013] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,013] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +5: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,014] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,015] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +7: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,016] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +1: [2023-02-10 00:17:38,017] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,017] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,018] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +3: [2023-02-10 00:17:38,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,019] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +4: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +6: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,020] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,021] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,022] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,023] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,025] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,026] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,027] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,028] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,030] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,033] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,035] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,038] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_08-model_00-model_states.pt. +2: [2023-02-10 00:17:38,039] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,171] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,171] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,173] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +1: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,174] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,175] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +2: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,176] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,177] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,178] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +5: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +7: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,180] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +4: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +3: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,181] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +0: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt... +6: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,183] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,184] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,186] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,187] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,188] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,189] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,190] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,190] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,191] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,191] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,192] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,193] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,194] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,195] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,196] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +2: [2023-02-10 00:17:38,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,196] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,197] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,197] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +6: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,198] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,199] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,200] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,201] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,201] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,202] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +1: [2023-02-10 00:17:38,202] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,203] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +5: [2023-02-10 00:17:38,204] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +3: [2023-02-10 00:17:38,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,204] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,205] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,206] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +7: [2023-02-10 00:17:38,206] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,207] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,209] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,210] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +0: [2023-02-10 00:17:38,211] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,212] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,212] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_09-model_00-model_states.pt. +4: [2023-02-10 00:17:38,216] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,217] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,218] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,385] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +5: [2023-02-10 00:17:38,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,387] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,388] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,390] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,390] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,391] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,392] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,393] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,394] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +0: [2023-02-10 00:17:38,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,395] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,396] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,397] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +2: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +6: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +7: [2023-02-10 00:17:38,398] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +3: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt... +1: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,399] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,401] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,403] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,404] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,404] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,405] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,406] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,407] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,408] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,409] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +5: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,410] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +4: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,411] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +1: [2023-02-10 00:17:38,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,412] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,413] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,414] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,414] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +6: [2023-02-10 00:17:38,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,415] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +2: [2023-02-10 00:17:38,415] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,416] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,417] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,418] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,419] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +3: [2023-02-10 00:17:38,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,419] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,420] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,421] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,422] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +0: [2023-02-10 00:17:38,423] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_10-model_00-model_states.pt. +7: [2023-02-10 00:17:38,423] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,424] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,428] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,429] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +5: [2023-02-10 00:17:38,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +5: [2023-02-10 00:17:38,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,570] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,571] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,572] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,572] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,573] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,574] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,575] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,580] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,582] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,582] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,585] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,586] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,587] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,588] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,589] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,590] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +2: [2023-02-10 00:17:38,590] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +1: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,591] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +4: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,592] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +7: [2023-02-10 00:17:38,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,593] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,593] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +3: [2023-02-10 00:17:38,594] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,594] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +0: [2023-02-10 00:17:38,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,595] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,595] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt... +6: [2023-02-10 00:17:38,596] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,598] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,600] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,600] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,601] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,601] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,602] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,603] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +6: [2023-02-10 00:17:38,604] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +2: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,605] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,606] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,606] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,607] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,607] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +3: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,608] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,609] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,610] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,611] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,612] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,613] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +7: [2023-02-10 00:17:38,613] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,614] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +1: [2023-02-10 00:17:38,615] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,616] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,616] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +0: [2023-02-10 00:17:38,617] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,617] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,618] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,619] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,621] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,622] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,624] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_11-model_00-model_states.pt. +4: [2023-02-10 00:17:38,630] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,649] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,650] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +5: [2023-02-10 00:17:38,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,659] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,662] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,665] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,666] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,667] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,668] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,669] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +5: [2023-02-10 00:17:38,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,673] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,674] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,675] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,711] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,713] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,713] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,712] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +3: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +4: [2023-02-10 00:17:38,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +1: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +2: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +7: [2023-02-10 00:17:38,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +0: [2023-02-10 00:17:38,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt... +6: [2023-02-10 00:17:38,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,724] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,724] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,730] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,731] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +3: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +6: [2023-02-10 00:17:38,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +2: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +1: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +7: [2023-02-10 00:17:38,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +4: [2023-02-10 00:17:38,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_12-model_00-model_states.pt. +0: [2023-02-10 00:17:38,748] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,750] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,751] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,788] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,789] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,789] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +5: [2023-02-10 00:17:38,797] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,798] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,804] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,805] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,807] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,811] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,812] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +5: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,813] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,814] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,814] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,815] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,815] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,816] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,821] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,825] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,825] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,826] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,826] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,827] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,827] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,828] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,829] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,830] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,831] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,832] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +3: [2023-02-10 00:17:38,833] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +1: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,834] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,835] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +7: [2023-02-10 00:17:38,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,836] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +4: [2023-02-10 00:17:38,836] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,837] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,838] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +0: [2023-02-10 00:17:38,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt... +6: [2023-02-10 00:17:38,839] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,839] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,840] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,841] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,842] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +4: [2023-02-10 00:17:38,843] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,843] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,844] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,845] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,847] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,848] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +1: [2023-02-10 00:17:38,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,848] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,849] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,850] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,851] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,852] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,853] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,853] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,854] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,854] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +6: [2023-02-10 00:17:38,855] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,855] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +3: [2023-02-10 00:17:38,856] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +7: [2023-02-10 00:17:38,857] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,858] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,859] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +2: [2023-02-10 00:17:38,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,860] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,861] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,862] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,865] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,869] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_13-model_00-model_states.pt. +0: [2023-02-10 00:17:38,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,875] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,956] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,959] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,961] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,963] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,965] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,966] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,967] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,968] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +3: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,972] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,973] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +4: [2023-02-10 00:17:38,973] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +5: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +2: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +1: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,976] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +7: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +0: [2023-02-10 00:17:38,977] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt... +6: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,978] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,979] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:38,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:38,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:38,980] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:38,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:38,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:38,981] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,982] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +5: [2023-02-10 00:17:38,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:38,983] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:38,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,984] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,984] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:38,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,985] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +2: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,986] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +6: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,987] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +1: [2023-02-10 00:17:38,988] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:38,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:38,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,989] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,989] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,990] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,990] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +4: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,992] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +3: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,993] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,994] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,995] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,997] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:38,997] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:38,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:38,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +0: [2023-02-10 00:17:38,998] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:38,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:38,998] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_14-model_00-model_states.pt. +7: [2023-02-10 00:17:38,999] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,000] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,003] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,004] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,005] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,129] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,130] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,131] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,131] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,132] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +1: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,133] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,134] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,134] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +4: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,135] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,136] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,137] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +3: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,138] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,139] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,139] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +7: [2023-02-10 00:17:39,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +5: [2023-02-10 00:17:39,140] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +2: [2023-02-10 00:17:39,140] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +0: [2023-02-10 00:17:39,141] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt... +6: [2023-02-10 00:17:39,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,141] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,142] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,143] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,144] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,145] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,145] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,146] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,147] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,148] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,149] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,150] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,151] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,152] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,153] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,153] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,154] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +5: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,155] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,156] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,157] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,158] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,159] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,159] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,160] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +1: [2023-02-10 00:17:39,160] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,161] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +3: [2023-02-10 00:17:39,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,161] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +2: [2023-02-10 00:17:39,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +7: [2023-02-10 00:17:39,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +6: [2023-02-10 00:17:39,162] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +4: [2023-02-10 00:17:39,163] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,164] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,165] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,166] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,167] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,168] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,172] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_15-model_00-model_states.pt. +0: [2023-02-10 00:17:39,179] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,324] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,324] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,325] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,326] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,326] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,327] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,328] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,329] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,329] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,330] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,331] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,332] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,333] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,334] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,335] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,335] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +6: [2023-02-10 00:17:39,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,337] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,338] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,338] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,339] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,341] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,343] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,343] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,344] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,345] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,345] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,346] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,346] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,347] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,347] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,348] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,348] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,350] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,350] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +6: [2023-02-10 00:17:39,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,351] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,352] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,353] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,354] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,355] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +7: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,356] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +5: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,357] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +1: [2023-02-10 00:17:39,358] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +7: [2023-02-10 00:17:39,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,358] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +2: [2023-02-10 00:17:39,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +4: [2023-02-10 00:17:39,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,359] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt... +0: [2023-02-10 00:17:39,359] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,360] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,363] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,363] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +1: [2023-02-10 00:17:39,364] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,365] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +0: [2023-02-10 00:17:39,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,365] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,366] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,367] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,368] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,369] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,371] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,371] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,372] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,373] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,374] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,374] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,375] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,376] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,377] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,378] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,378] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +4: [2023-02-10 00:17:39,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,379] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,380] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,381] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +2: [2023-02-10 00:17:39,382] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +5: [2023-02-10 00:17:39,383] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,386] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_16-model_00-model_states.pt. +3: [2023-02-10 00:17:39,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,385] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,386] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,387] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,389] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,391] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,527] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,528] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,529] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,530] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +3: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +6: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,531] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +1: [2023-02-10 00:17:39,532] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,533] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +2: [2023-02-10 00:17:39,534] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +0: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,535] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,537] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,538] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +7: [2023-02-10 00:17:39,538] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt... +4: [2023-02-10 00:17:39,539] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,540] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,541] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,542] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,543] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,544] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,545] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,544] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,545] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,546] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,546] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,547] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +3: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,548] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +2: [2023-02-10 00:17:39,549] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +6: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,550] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,551] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,552] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +1: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,554] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,555] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +7: [2023-02-10 00:17:39,555] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,556] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +5: [2023-02-10 00:17:39,556] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,557] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +0: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,558] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,560] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,561] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,562] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_17-model_00-model_states.pt. +4: [2023-02-10 00:17:39,562] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,563] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,565] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,567] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,708] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,709] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,710] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,714] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,714] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,715] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,716] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +2: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,717] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +5: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,718] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,719] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +6: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,720] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +7: [2023-02-10 00:17:39,721] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,721] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +4: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,722] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +1: [2023-02-10 00:17:39,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +3: [2023-02-10 00:17:39,723] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt... +0: [2023-02-10 00:17:39,723] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,725] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,726] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,727] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,727] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,728] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,728] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,729] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,730] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,731] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,732] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,732] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,733] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,733] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,734] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,735] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,736] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +2: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,737] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +7: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +0: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,738] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,739] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,739] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,740] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,741] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,741] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,742] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,742] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +6: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +5: [2023-02-10 00:17:39,743] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,744] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,745] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,746] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +4: [2023-02-10 00:17:39,746] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,747] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,749] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +1: [2023-02-10 00:17:39,749] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,750] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_18-model_00-model_states.pt. +3: [2023-02-10 00:17:39,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,752] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,755] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,756] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,911] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,911] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,912] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,913] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,914] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,916] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,920] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,923] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,924] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,925] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,925] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,926] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,926] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,927] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,928] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,928] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,929] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,930] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,931] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,931] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,932] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,933] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,933] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +3: [2023-02-10 00:17:39,934] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +3: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,935] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,936] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,937] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,938] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,939] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +6: [2023-02-10 00:17:39,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:39,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:39,940] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,941] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +6: [2023-02-10 00:17:39,942] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,943] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,943] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +0: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +0: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +5: [2023-02-10 00:17:39,944] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +4: [2023-02-10 00:17:39,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,945] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:39,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,946] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,946] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,947] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,947] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,948] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:39,949] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,951] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +1: [2023-02-10 00:17:39,952] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,953] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,953] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,954] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +4: [2023-02-10 00:17:39,955] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,956] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,957] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +1: [2023-02-10 00:17:39,958] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,959] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,960] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,960] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt... +2: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,961] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,962] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,963] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +2: [2023-02-10 00:17:39,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:39,964] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,968] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,969] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:39,970] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,971] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +5: [2023-02-10 00:17:39,971] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,974] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,974] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,975] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,976] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,978] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,980] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_19-model_00-model_states.pt. +7: [2023-02-10 00:17:39,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,981] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:39,985] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,041] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,041] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,042] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,043] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,044] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,045] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +2: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +7: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,046] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +6: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,047] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +0: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +3: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,048] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,049] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +1: [2023-02-10 00:17:40,049] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,050] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +4: [2023-02-10 00:17:40,050] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt... +5: [2023-02-10 00:17:40,051] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,052] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,053] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,054] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,055] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,056] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,057] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,058] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,059] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,060] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,060] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,061] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,062] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,063] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,063] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,064] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,065] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,066] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +7: [2023-02-10 00:17:40,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,066] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,067] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,068] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,069] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,069] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,070] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +5: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,071] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +2: [2023-02-10 00:17:40,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,072] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,073] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +4: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +6: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,074] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +0: [2023-02-10 00:17:40,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +1: [2023-02-10 00:17:40,075] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_20-model_00-model_states.pt. +3: [2023-02-10 00:17:40,076] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,077] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +2: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +3: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +7: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +5: [2023-02-10 00:17:40,078] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +5: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +3: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +7: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +2: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,079] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,080] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +1: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +1: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,081] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: > overriding learning rate value to 0.0002 +0: > overriding minimum learning rate value to 2e-05 +0: > overriding warmup iterations value to 0 +0: > overriding total number of iterations value to 1 +0: > overriding decay style value to cosine +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt... +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +4: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +0: [2023-02-10 00:17:40,082] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/layer_22-model_00-model_states.pt. +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt... +6: [2023-02-10 00:17:40,099] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,100] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt... +3: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt... +5: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt... +4: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt... +1: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt... +2: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt... +7: [2023-02-10 00:17:40,101] [INFO] [torch_checkpoint_engine.py:21:load] [Torch] Loading checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt... +0: [2023-02-10 00:17:40,342] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:40,342] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 1 +0: [2023-02-10 00:17:40,345] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 1 +6: [2023-02-10 00:17:40,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,364] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,364] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 54 +6: [2023-02-10 00:17:40,364] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 52 +6: [2023-02-10 00:17:40,367] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 54 +6: [2023-02-10 00:17:40,367] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 52 +1: [2023-02-10 00:17:40,369] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,369] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 9 +2: [2023-02-10 00:17:40,370] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,370] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 20 +2: [2023-02-10 00:17:40,373] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 20 +1: [2023-02-10 00:17:40,373] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 9 +1: [2023-02-10 00:17:40,376] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 12 +5: [2023-02-10 00:17:40,377] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,377] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 41 +1: [2023-02-10 00:17:40,379] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 12 +5: [2023-02-10 00:17:40,380] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 41 +3: [2023-02-10 00:17:40,384] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,384] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 26 +3: [2023-02-10 00:17:40,387] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 26 +1: [2023-02-10 00:17:40,394] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,394] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 13 +0: [2023-02-10 00:17:40,396] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:40,396] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 7 +1: [2023-02-10 00:17:40,397] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 13 +0: [2023-02-10 00:17:40,399] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 7 +2: [2023-02-10 00:17:40,401] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,401] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 19 +5: [2023-02-10 00:17:40,402] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,402] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 44 +3: [2023-02-10 00:17:40,403] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,403] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 27 +2: [2023-02-10 00:17:40,404] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 19 +5: [2023-02-10 00:17:40,405] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 44 +3: [2023-02-10 00:17:40,407] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 27 +0: [2023-02-10 00:17:40,412] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:40,412] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 3 +0: [2023-02-10 00:17:40,415] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 3 +5: [2023-02-10 00:17:40,417] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,418] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 46 +5: [2023-02-10 00:17:40,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,418] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 53 +5: [2023-02-10 00:17:40,418] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,418] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 43 +5: [2023-02-10 00:17:40,418] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 40 +6: [2023-02-10 00:17:40,421] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 53 +5: [2023-02-10 00:17:40,421] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,422] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 42 +5: [2023-02-10 00:17:40,422] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 43 +5: [2023-02-10 00:17:40,422] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 46 +3: [2023-02-10 00:17:40,422] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,422] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 40 +3: [2023-02-10 00:17:40,422] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 24 +5: [2023-02-10 00:17:40,425] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 42 +3: [2023-02-10 00:17:40,425] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 24 +6: [2023-02-10 00:17:40,426] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,427] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 48 +6: [2023-02-10 00:17:40,430] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 48 +7: [2023-02-10 00:17:40,434] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,434] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 61 +7: [2023-02-10 00:17:40,437] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 61 +7: [2023-02-10 00:17:40,440] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,440] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 57 +1: [2023-02-10 00:17:40,442] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,442] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 10 +7: [2023-02-10 00:17:40,443] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 57 +1: [2023-02-10 00:17:40,445] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 10 +2: [2023-02-10 00:17:40,447] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,447] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 22 +3: [2023-02-10 00:17:40,448] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,449] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 29 +2: [2023-02-10 00:17:40,450] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 22 +2: [2023-02-10 00:17:40,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,451] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 18 +3: [2023-02-10 00:17:40,451] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,451] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 31 +3: [2023-02-10 00:17:40,452] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 29 +3: [2023-02-10 00:17:40,454] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 31 +2: [2023-02-10 00:17:40,454] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 18 +0: [2023-02-10 00:17:40,455] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:40,455] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 5 +0: [2023-02-10 00:17:40,458] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 5 +6: [2023-02-10 00:17:40,461] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,461] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 55 +4: [2023-02-10 00:17:40,462] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,462] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 39 +2: [2023-02-10 00:17:40,464] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,464] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 23 +6: [2023-02-10 00:17:40,464] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 55 +4: [2023-02-10 00:17:40,465] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 39 +2: [2023-02-10 00:17:40,466] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 23 +6: [2023-02-10 00:17:40,466] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,467] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 51 +6: [2023-02-10 00:17:40,471] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 51 +4: [2023-02-10 00:17:40,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,475] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,475] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 38 +5: [2023-02-10 00:17:40,475] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 47 +5: [2023-02-10 00:17:40,479] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 47 +4: [2023-02-10 00:17:40,478] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 38 +0: [2023-02-10 00:17:40,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,480] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,480] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 35 +0: [2023-02-10 00:17:40,480] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 6 +3: [2023-02-10 00:17:40,481] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,482] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 28 +4: [2023-02-10 00:17:40,483] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 35 +0: [2023-02-10 00:17:40,483] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 6 +0: [2023-02-10 00:17:40,484] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:40,485] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 4 +3: [2023-02-10 00:17:40,485] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 28 +0: [2023-02-10 00:17:40,488] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 4 +2: [2023-02-10 00:17:40,495] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,495] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 21 +5: [2023-02-10 00:17:40,497] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt. +5: [2023-02-10 00:17:40,497] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 45 +2: [2023-02-10 00:17:40,498] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 21 +6: [2023-02-10 00:17:40,498] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,498] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 49 +5: [2023-02-10 00:17:40,500] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 45 +6: [2023-02-10 00:17:40,502] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 49 +1: [2023-02-10 00:17:40,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,505] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 8 +3: [2023-02-10 00:17:40,505] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,506] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 25 +6: [2023-02-10 00:17:40,508] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,508] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 50 +1: [2023-02-10 00:17:40,508] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 8 +3: [2023-02-10 00:17:40,508] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 25 +0: [2023-02-10 00:17:40,511] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt. +6: [2023-02-10 00:17:40,511] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 50 +0: [2023-02-10 00:17:40,512] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 2 +7: [2023-02-10 00:17:40,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,512] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 63 +1: [2023-02-10 00:17:40,512] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,513] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 15 +2: [2023-02-10 00:17:40,513] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,513] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 17 +7: [2023-02-10 00:17:40,515] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 63 +0: [2023-02-10 00:17:40,516] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 2 +1: [2023-02-10 00:17:40,515] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 15 +2: [2023-02-10 00:17:40,516] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 17 +7: [2023-02-10 00:17:40,541] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,541] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 62 +7: [2023-02-10 00:17:40,544] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 62 +4: [2023-02-10 00:17:40,553] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,554] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 36 +4: [2023-02-10 00:17:40,556] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 36 +2: [2023-02-10 00:17:40,557] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt. +2: [2023-02-10 00:17:40,557] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 16 +4: [2023-02-10 00:17:40,559] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,559] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 37 +2: [2023-02-10 00:17:40,560] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 16 +4: [2023-02-10 00:17:40,562] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 37 +3: [2023-02-10 00:17:40,568] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt. +3: [2023-02-10 00:17:40,568] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 30 +7: [2023-02-10 00:17:40,570] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,570] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 60 +3: [2023-02-10 00:17:40,571] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 30 +7: [2023-02-10 00:17:40,573] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 60 +7: [2023-02-10 00:17:40,581] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,581] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 58 +7: [2023-02-10 00:17:40,584] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 58 +1: [2023-02-10 00:17:40,599] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,600] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 11 +1: [2023-02-10 00:17:40,605] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 11 +4: [2023-02-10 00:17:40,624] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:40,625] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 34 +7: [2023-02-10 00:17:40,627] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,627] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 56 +4: [2023-02-10 00:17:40,629] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 34 +4: [2023-02-10 00:17:40,630] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,630] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 56 +4: [2023-02-10 00:17:40,630] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 33 +7: [2023-02-10 00:17:40,632] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt. +7: [2023-02-10 00:17:40,632] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 59 +4: [2023-02-10 00:17:40,634] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 33 +7: [2023-02-10 00:17:40,635] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 59 +1: [2023-02-10 00:17:40,692] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt. +1: [2023-02-10 00:17:40,692] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 14 +1: [2023-02-10 00:17:40,695] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 14 +0: [2023-02-10 00:17:43,958] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt. +0: [2023-02-10 00:17:43,958] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 0 +0: [2023-02-10 00:17:43,963] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 0 +0: could not find arguments in the checkpoint ... +0: checkpoint version 3.0 +4: [2023-02-10 00:17:43,970] [INFO] [torch_checkpoint_engine.py:23:load] [Torch] Loaded checkpoint from checkpoints_421m3b93b9/global_step7508/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt. +4: [2023-02-10 00:17:43,970] [INFO] [engine.py:2844:_get_all_zero_checkpoint_state_dicts] successfully read 64 ZeRO state_dicts for rank 32 +4: [2023-02-10 00:17:43,974] [INFO] [engine.py:2784:_load_zero_checkpoint] loading 64 zero partition checkpoints for rank 32 +0: successfully loaded checkpoint from checkpoints_421m3b93b9 at iteration 0 +7: time (ms) | load-checkpoint: 7544.26 +0: estimated model parameters: 0.42120704 +0: estimated model parameters without embeddings: 0.35419648 +0: [after model, optimizer, and learning rate scheduler are built] datetime: 2023-02-10 00:17:44 +0: > building train, validation, and test datasets ... +0: > datasets target sizes (minimum size): +0: train: 1 +0: validation: 25600 +0: test: 25600 +0: > building train, validation, and test datasets for GPT ... +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 1.348967 seconds +0: number of documents: 8148327 +0: > dataset split: +0: train: +0: document indices in [0, 8148327) total of 8148327 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document_train_indexmap_1ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.087 seconds +0: total number of samples: 1903063 +0: total number of epochs: 1 +0: > building dataset index ... +0: reading sizes... +0: reading pointers... +0: reading document index... +0: creating numpy buffer of mmap... +0: creating memory view of numpy buffer... +0: > finished creating indexed dataset in 0.040296 seconds +0: number of documents: 364608 +0: > dataset split: +0: validation: +0: document indices in [0, 364608) total of 364608 documents +0: > loading doc-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_doc_idx.npy +0: > loading sample-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_sample_idx.npy +0: > loading shuffle-idx mapping from /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document_validation_indexmap_25600ns_2048sl_1234s_shuffle_idx.npy +0: loaded indexed file in 0.074 seconds +0: total number of samples: 84978 +0: total number of epochs: 1 +0: > finished creating GPT datasets ... +0: [after dataloaders are built] datetime: 2023-02-10 00:17:59 +0: done with setup ... +0: training ... +7: time (ms) | model-and-optimizer-setup: 25442.03 | train/valid/test-data-iterators-setup: 15045.02 +0: [after training is done] datetime: 2023-02-10 00:17:59 +7: ----------------------------------------------------------------------------------------------------------------- +7: validation loss at the end of training for val data | lm loss value: 3.348126E+00 | lm loss PPL: 2.844938E+01 | +7: ----------------------------------------------------------------------------------------------------------------- +END 2821215: Fri 10 Feb 2023 12:18:29 AM EET diff --git a/421m3b93b9/sbatch_421m3b93b9.sh b/421m3b93b9/sbatch_421m3b93b9.sh new file mode 100755 index 0000000000000000000000000000000000000000..db3a775d689d1407282e7afa0669960116264dde --- /dev/null +++ b/421m3b93b9/sbatch_421m3b93b9.sh @@ -0,0 +1,163 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b93b9 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train3b9.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1_922_149 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 19_221 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1000 \ + --eval-iters 1 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b93b9/sbatch_421m3b93b9val.sh b/421m3b93b9/sbatch_421m3b93b9val.sh new file mode 100644 index 0000000000000000000000000000000000000000..17daca1c5334816f16dc716f24e9338066aa30bb --- /dev/null +++ b/421m3b93b9/sbatch_421m3b93b9val.sh @@ -0,0 +1,167 @@ +#!/bin/bash +#SBATCH --nodes=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=32 +#SBATCH --mem=256G +#SBATCH -p standard-g +#SBATCH -t 48:00:00 +#SBATCH --gpus-per-node=mi250:8 +#SBATCH --exclusive=user +#SBATCH --hint=nomultithread +#SBATCH --account=project_462000119 +#SBATCH -o logs/%j.out +#SBATCH -e logs/%j.err + +VARIANT=421m3b93b9val +VARIANT_CKPT=421m3b93b9 + +# if run without sbatch, invoke here +if [ -z $SLURM_JOB_ID ]; then + mkdir -p logs + sbatch "$0" + exit +fi + +set -euo pipefail + +# symlink logs/latest.out and logs/latest.err +ln -f -s $SLURM_JOB_ID.out logs/latest.out +ln -f -s $SLURM_JOB_ID.err logs/latest.err + +KILL_SWITCH_PATH=kill-switch-$VARIANT +CHECKPOINT_PATH=checkpoints_$VARIANT_CKPT +TENSORBOARD_PATH=tensorboard_$VARIANT + +# Data +VOCAB_FILE="gpt2/vocab.json" +MERGE_FILE="gpt2/merges.txt" +#DATA_PATH="/scratch/project_462000119/data/pile/megatron_data/meg-gpt2_pile_text_document" +TRAIN_DATA_PATH=train3b9.txt +# "train: 1.0 0:1 /scratch/project_462000119/data/c4_subsampled/gpt2tok_c4_en_3B9_text_document" +VALID_DATA_PATH=val.txt +# "validation: 1.0 0:1 /scratch/project_462000119/data/c4_validation/gpt2tok_c4validation_rerun_text_document" + + +PP_SIZE=1 +TP_SIZE=1 + +MICRO_BATCH_SIZE=4 +GRADIENT_ACCUMULATION_STEPS=1 +WORLD_SIZE=$((SLURM_GPUS_ON_NODE*SLURM_JOB_NUM_NODES)) +GLOBAL_BATCH_SIZE=$((MICRO_BATCH_SIZE*WORLD_SIZE*GRADIENT_ACCUMULATION_STEPS)) + +# Model parameters +source model_params.sh +MODEL_PARAM=("${PARAM_425M[@]}") +NHIDDEN=${MODEL_PARAM[0]} +FFN_HIDDEN_SIZE=${MODEL_PARAM[1]} +KV_SIZE=${MODEL_PARAM[2]} +NHEADS=${MODEL_PARAM[3]} +NLAYERS=${MODEL_PARAM[4]} +SEQ_LEN=2048 + +echo "Model parameters: d_model $NHIDDEN ffw_size $FFN_HIDDEN_SIZE kv_size $KV_SIZE n_heads $NHEADS n_layers $NLAYERS" + +SAVE_INTERVAL=1000 + +# Tokens: 3936562000 +# -> Samples: 1_922_149 +TRAIN_SAMPLES=1 + +OPTIMIZER_ARGS=" \ + --optimizer adam \ + --adam-beta1 0.9 \ + --adam-beta2 0.999 \ + --adam-eps 1e-8 \ + --lr 2e-4 \ + --min-lr 2e-5 \ + --lr-decay-style cosine \ + --lr-decay-samples $TRAIN_SAMPLES \ + --lr-warmup-samples 0 \ + --clip-grad 1.0 \ + --weight-decay 1e-1 \ + --no-load-optim \ + --reset-progress \ + --override-lr-scheduler \ + " + +GPT_ARGS=" \ + --num-layers $NLAYERS \ + --hidden-size $NHIDDEN \ + --num-attention-heads $NHEADS \ + --kv-channels $KV_SIZE \ + --ffn-hidden-size $FFN_HIDDEN_SIZE \ + --seq-length $SEQ_LEN \ + --max-position-embeddings $SEQ_LEN \ + --micro-batch-size $MICRO_BATCH_SIZE \ + --global-batch-size $GLOBAL_BATCH_SIZE \ + --train-samples $TRAIN_SAMPLES \ + --vocab-file $VOCAB_FILE \ + --merge-file $MERGE_FILE \ + --clip-grad 1.0 \ + --kill-switch-path $KILL_SWITCH_PATH \ + --bf16 \ + $OPTIMIZER_ARGS \ + " + +OUTPUT_ARGS=" \ + --log-interval 10 \ + --save-interval $SAVE_INTERVAL \ + --eval-interval 1 \ + --eval-iters 100 \ + --tensorboard-dir $TENSORBOARD_PATH \ + --tensorboard-queue-size 5 \ + --log-timers-to-tensorboard \ + --log-batch-size-to-tensorboard \ + --log-validation-ppl-to-tensorboard \ + " + +ZERO_STAGE=0 + +mkdir -p ds_configs +DS_CONFIG_PATH="ds_configs/$SLURM_JOB_ID.json" + +cat < $DS_CONFIG_PATH +{ + "train_micro_batch_size_per_gpu": $MICRO_BATCH_SIZE, + "train_batch_size": $GLOBAL_BATCH_SIZE, + "gradient_clipping": 1.0, + "zero_optimization": { + "stage": $ZERO_STAGE + }, + "bf16": { + "enabled": true + }, + "steps_per_print": 2000, + "wall_clock_breakdown": false +} +EOF + +DEEPSPEED_ARGS=" \ + --deepspeed \ + --deepspeed_config $DS_CONFIG_PATH \ + --zero-stage $ZERO_STAGE \ + " + +CMD=" \ + Megatron-DeepSpeed/pretrain_gpt.py \ + --tensor-model-parallel-size $TP_SIZE \ + --pipeline-model-parallel-size $PP_SIZE \ + $GPT_ARGS \ + $OUTPUT_ARGS \ + --save $CHECKPOINT_PATH \ + --load $CHECKPOINT_PATH \ + --train-weighted-split-paths-path $TRAIN_DATA_PATH \ + --valid-weighted-split-paths-path $VALID_DATA_PATH \ + --data-impl mmap \ + $DEEPSPEED_ARGS \ + " + +echo $CMD + +echo "START $SLURM_JOBID: $(date)" + +# bash launch_srun.sh $CMD +srun --label launch.sh $CMD + +echo "END $SLURM_JOBID: $(date)" diff --git a/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675934994.nid006081.105253.0 b/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675934994.nid006081.105253.0 new file mode 100644 index 0000000000000000000000000000000000000000..a7916f0aba1aa953162526bf501551c26cad6187 --- /dev/null +++ b/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675934994.nid006081.105253.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4500827d8f2603b8623eebf152a0f0096b47c9d18a922485bda5dea42ae139a6 +size 6563581 diff --git a/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675976681.nid006025.123403.0 b/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675976681.nid006025.123403.0 new file mode 100644 index 0000000000000000000000000000000000000000..2659ba2f5f686c462af96ccbb4bdab28e31dd5c0 --- /dev/null +++ b/421m3b93b9/tensorboard_421m3b93b9/events.out.tfevents.1675976681.nid006025.123403.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a588b038c4728df6e8382dc799d968d8b0c6ad90b0f8784cbde9bb9a0626d4aa +size 8032095 diff --git a/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675555841.nid005650.110144.0 b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675555841.nid005650.110144.0 new file mode 100644 index 0000000000000000000000000000000000000000..a2b19ecc1371ccd14c366815513850d449f7aa73 --- /dev/null +++ b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675555841.nid005650.110144.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b264742864beeb91b47f4b628c5850d0f800ec0d261c243a281207ab0e25e684 +size 980 diff --git a/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675975736.nid006912.2155.0 b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675975736.nid006912.2155.0 new file mode 100644 index 0000000000000000000000000000000000000000..08361c09dc05e85b89a2c1acabd59421244c2de8 --- /dev/null +++ b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675975736.nid006912.2155.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3afc56291552b96a4c8be8165d92bd184721e14ab66fb66233c31d073ef75c6 +size 980 diff --git a/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675981016.nid007045.93720.0 b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675981016.nid007045.93720.0 new file mode 100644 index 0000000000000000000000000000000000000000..a4c317740790cc6e813ca9ccc3093e9223128ccc --- /dev/null +++ b/421m3b93b9/tensorboard_421m3b93b9val/events.out.tfevents.1675981016.nid007045.93720.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d599411db4cd06061e1fd9dba010126c9f06530bf47e520791872626e81bcb +size 980 diff --git a/421m3b93b9/transformers/config.json b/421m3b93b9/transformers/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea88768d351c626be3e9ca050a00f7a5eb7f522c --- /dev/null +++ b/421m3b93b9/transformers/config.json @@ -0,0 +1 @@ +{"vocab_size": 50304, "n_positions": 2048, "n_embd": 1280, "n_layer": 18, "n_head": 10, "n_inner": 5120, "activation_function": "gelu", "resid_pdrop": 0.1, "embd_pdrop": 0.1, "attn_pdrop": 0.1, "layer_norm_epsilon": 1e-05, "initializer_range": 0.02, "summary_type": "cls_index", "summary_use_proj": true, "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "scale_attn_weights": true, "use_cache": true, "scale_attn_by_inverse_layer_idx": false, "reorder_and_upcast_attn": false, "bos_token_id": 50256, "eos_token_id": 50256, "return_dict": true, "output_hidden_states": false, "output_attentions": false, "torchscript": false, "torch_dtype": null, "use_bfloat16": false, "tf_legacy_loss": false, "pruned_heads": {}, "tie_word_embeddings": true, "is_encoder_decoder": false, "is_decoder": false, "cross_attention_hidden_size": null, "add_cross_attention": false, "tie_encoder_decoder": false, "max_length": 20, "min_length": 0, "do_sample": false, "early_stopping": false, "num_beams": 1, "num_beam_groups": 1, "diversity_penalty": 0.0, "temperature": 1.0, "top_k": 50, "top_p": 1.0, "typical_p": 1.0, "repetition_penalty": 1.0, "length_penalty": 1.0, "no_repeat_ngram_size": 0, "encoder_no_repeat_ngram_size": 0, "bad_words_ids": null, "num_return_sequences": 1, "chunk_size_feed_forward": 0, "output_scores": false, "return_dict_in_generate": false, "forced_bos_token_id": null, "forced_eos_token_id": null, "remove_invalid_values": false, "exponential_decay_length_penalty": null, "suppress_tokens": null, "begin_suppress_tokens": null, "architectures": ["GPT2LMHeadModel"], "finetuning_task": null, "id2label": {"0": "LABEL_0", "1": "LABEL_1"}, "label2id": {"LABEL_0": 0, "LABEL_1": 1}, "tokenizer_class": null, "prefix": null, "pad_token_id": null, "sep_token_id": null, "decoder_start_token_id": null, "task_specific_params": null, "problem_type": null, "_name_or_path": "", "transformers_version": "4.25.0.dev0", "n_ctx": 1024, "gradient_checkpointing": false, "model_type": "gpt2"} \ No newline at end of file diff --git a/421m3b93b9/transformers/pytorch_model.bin b/421m3b93b9/transformers/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ccb3bc7695e94afaa0d4539c5061fdbedf96bf0b --- /dev/null +++ b/421m3b93b9/transformers/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b826ffa7f16baf6942b82543b5a2b49bcf7e82db4f8fa662189d1b53731336 +size 993488781